Merge pull request #52 from Hestia-Homes/feature/hubspot-as_the_source-of_truth

Feature/hubspot as the source of truth
This commit is contained in:
Jun-te Kim 2025-05-21 15:55:07 +01:00 committed by GitHub
commit ed1a5508d5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 708 additions and 98 deletions

View file

@ -5,7 +5,7 @@ on:
workflow_dispatch:
jobs:
sharepoint-validator:
hubspot-deals-to-db:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
@ -24,7 +24,7 @@ jobs:
run: |
pwd
ls -la
poetry run python etl/hubspot_to_invoice.py
poetry run python etl/hubspot_to_invoice_rewrite.py
env:
PYTHONPATH: ${{ github.workspace }}
DATABASE_URL: postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB

View file

@ -0,0 +1,354 @@
"""Initial table
Revision ID: 427e65da69c1
Revises:
Create Date: 2025-05-14 15:36:08.611971
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
import sqlmodel
# revision identifiers, used by Alembic.
revision: str = '427e65da69c1'
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('buildings',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('address', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('postcode', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('UPRN', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('landlord_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('domna_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('companyinfo',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('address', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('trading_name', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('post_code', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('fax_number', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column('related_party_disclosure', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_table('door',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('no_of_doors', sa.Integer(), nullable=False),
sa.Column('no_of_insulated_doors', sa.Integer(), nullable=False),
sa.Column('u_value_w_m2_k', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_table('floors',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('floor_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('ground_floor_construction', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('ground_floor_insulation_type', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column('floor_insulation_thickness_mm', sa.Float(), nullable=True),
sa.Column('u_value_known', sa.Boolean(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('fluegasheatrecoverysystem',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('fghrs_present', sa.Boolean(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('heatingsystemcontrols',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('control_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('flue_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('fan_assisted_flue', sa.Boolean(), nullable=False),
sa.Column('heat_emitter_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('electricity_meter_type', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column('mains_gas_available', sa.Boolean(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_table('heatingtype',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('heating_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('fuel_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('hotwatercylinder',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('volume', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('insulation_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('insulation_thickness', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('thermostat', sa.Boolean(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('insulation',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('lighting',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('total_no_of_light_fittings', sa.Integer(), nullable=False),
sa.Column('total_no_of_lel_fittings', sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('otherdetails',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('electricity_meter_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('main_gas_avalible', sa.Boolean(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('photovoltaicpanel',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('pvs_are_connected_to_dwelling_electricity_meter', sa.Boolean(), nullable=False),
sa.Column('percentage_of_external_roof_area_with_pvs', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('presitenotessummaryinfo',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('reference_number', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('epc_language', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('uprn', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column('postcode', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('region', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('address', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('town', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('county', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column('property_tenure', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('transaction_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('inspection_date', sa.DateTime(), nullable=False),
sa.Column('current_sap', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('potential_sap', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('current_ei', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('potential_ei', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('current_annual_emissions', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('current_annual_emission_including_0925_multiplayer', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('current_annual_energy_costs', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('roofs',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('construction', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('insulation_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('insulation_thickness', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('u_value_known', sa.Boolean(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('showerandbaths',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('no_of_rooms_with_baths_and_or_shower', sa.Integer(), nullable=False),
sa.Column('no_of_rooms_with_mixer_shower_and_no_baths', sa.Integer(), nullable=False),
sa.Column('no_of_rooms_with_mixer_shower_and_baths', sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('solarwaterheating',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('solar_water_heating_details_known', sa.Boolean(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('ventilationandcooling',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('no_of_open_fireplaces', sa.Integer(), nullable=False),
sa.Column('ventilation_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('space_cooling_system_present', sa.Boolean(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('walls',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('construction', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('insulation', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('insulation_thickness_mm', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('wall_thickness_measured', sa.Boolean(), nullable=False),
sa.Column('wall_thickness_mm', sa.Integer(), nullable=True),
sa.Column('u_value_known', sa.Boolean(), nullable=False),
sa.Column('u_value_w_m2_k', sa.Float(), nullable=True),
sa.Column('dry_lining', sa.Boolean(), nullable=False),
sa.Column('alternative_wall_present', sa.Boolean(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('waterheating',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('heating_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('fuel_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('windturbine',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('wind_turbine', sa.Boolean(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('assessorinfo',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('accreditation_number', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('name', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('phone_number', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column('email_address', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column('company_id', sa.Uuid(), nullable=True),
sa.ForeignKeyConstraint(['company_id'], ['companyinfo.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_table('heating',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('heating_source', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('efficiency_source', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('heating_fuel', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('brand_name', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('model_name', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('model_qualifer', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('sap_2009_table', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column('percentage_of_heated_floor_area_served', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column('controls_id', sa.Uuid(), nullable=True),
sa.ForeignKeyConstraint(['controls_id'], ['heatingsystemcontrols.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_table('propertydetail',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('age_band', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('wall_id', sa.Uuid(), nullable=True),
sa.Column('roof_id', sa.Uuid(), nullable=True),
sa.Column('floor_id', sa.Uuid(), nullable=True),
sa.ForeignKeyConstraint(['floor_id'], ['floors.id'], ),
sa.ForeignKeyConstraint(['roof_id'], ['roofs.id'], ),
sa.ForeignKeyConstraint(['wall_id'], ['walls.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_table('dimension',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('floor_area_m2', sa.Float(), nullable=False),
sa.Column('room_height_m', sa.Float(), nullable=False),
sa.Column('loss_perimeter_m', sa.Float(), nullable=False),
sa.Column('party_wall_length_m', sa.Float(), nullable=False),
sa.Column('property_detail_id', sa.Uuid(), nullable=True),
sa.ForeignKeyConstraint(['property_detail_id'], ['propertydetail.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_table('documents',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('assessor_id', sa.Uuid(), nullable=False),
sa.Column('created_at', sa.DateTime(), nullable=False),
sa.Column('document_type', sa.Enum('QUIDOS_PRESITE_NOTE', 'CHARTED_SURVEYOR_REPORT', 'ENERGY_PERFORMANCE_REPORT', 'U_VALUE_CALCULATOR_REPORT', 'OVERWRITING_U_VALUE_DECLARATION_FORM', name='reporttype'), nullable=False),
sa.Column('building_id', sa.Uuid(), nullable=False),
sa.Column('target_table', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('target_id', sa.Uuid(), nullable=False),
sa.ForeignKeyConstraint(['assessor_id'], ['assessorinfo.id'], ),
sa.ForeignKeyConstraint(['building_id'], ['buildings.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_table('propertydescription',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('built_form', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('detachment_or_position', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('no_of_main_property', sa.Integer(), nullable=False),
sa.Column('no_of_extension_1', sa.Integer(), nullable=True),
sa.Column('no_of_extension_2', sa.Integer(), nullable=True),
sa.Column('no_of_extension_3', sa.Integer(), nullable=True),
sa.Column('no_of_extension_4', sa.Integer(), nullable=True),
sa.Column('no_of_habitable_rooms', sa.Integer(), nullable=False),
sa.Column('no_of_heated_rooms', sa.Integer(), nullable=False),
sa.Column('heated_basement', sa.Boolean(), nullable=False),
sa.Column('conservatory_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('percentage_of_draught_proofed', sa.Integer(), nullable=False),
sa.Column('terrain_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('conservatory', sa.Boolean(), nullable=False),
sa.Column('main_property_id', sa.Uuid(), nullable=False),
sa.Column('ex1_property_id', sa.Uuid(), nullable=True),
sa.Column('ex2_property_id', sa.Uuid(), nullable=True),
sa.Column('ex3_property_id', sa.Uuid(), nullable=True),
sa.Column('ex4_property_id', sa.Uuid(), nullable=True),
sa.Column('door_id', sa.Uuid(), nullable=True),
sa.Column('ventilation_and_cooling_id', sa.Uuid(), nullable=True),
sa.Column('lighting_id', sa.Uuid(), nullable=True),
sa.Column('water_heating_id', sa.Uuid(), nullable=True),
sa.Column('hot_water_cylinder_id', sa.Uuid(), nullable=True),
sa.Column('solar_water_heating_id', sa.Uuid(), nullable=True),
sa.Column('shower_and_baths_id', sa.Uuid(), nullable=True),
sa.Column('flue_gas_heat_recovery_system_id', sa.Uuid(), nullable=True),
sa.Column('photovoltaic_panel_id', sa.Uuid(), nullable=True),
sa.Column('wind_turbine_id', sa.Uuid(), nullable=True),
sa.Column('other_details_id', sa.Uuid(), nullable=True),
sa.Column('main_heating_id', sa.Uuid(), nullable=True),
sa.Column('main_heating2_id', sa.Uuid(), nullable=True),
sa.Column('secondary_heating_type_id', sa.Uuid(), nullable=True),
sa.ForeignKeyConstraint(['door_id'], ['door.id'], ),
sa.ForeignKeyConstraint(['ex1_property_id'], ['propertydetail.id'], ),
sa.ForeignKeyConstraint(['ex2_property_id'], ['propertydetail.id'], ),
sa.ForeignKeyConstraint(['ex3_property_id'], ['propertydetail.id'], ),
sa.ForeignKeyConstraint(['ex4_property_id'], ['propertydetail.id'], ),
sa.ForeignKeyConstraint(['flue_gas_heat_recovery_system_id'], ['fluegasheatrecoverysystem.id'], ),
sa.ForeignKeyConstraint(['hot_water_cylinder_id'], ['hotwatercylinder.id'], ),
sa.ForeignKeyConstraint(['lighting_id'], ['lighting.id'], ),
sa.ForeignKeyConstraint(['main_heating2_id'], ['heating.id'], ),
sa.ForeignKeyConstraint(['main_heating_id'], ['heating.id'], ),
sa.ForeignKeyConstraint(['main_property_id'], ['propertydetail.id'], ),
sa.ForeignKeyConstraint(['other_details_id'], ['otherdetails.id'], ),
sa.ForeignKeyConstraint(['photovoltaic_panel_id'], ['photovoltaicpanel.id'], ),
sa.ForeignKeyConstraint(['secondary_heating_type_id'], ['heatingtype.id'], ),
sa.ForeignKeyConstraint(['shower_and_baths_id'], ['showerandbaths.id'], ),
sa.ForeignKeyConstraint(['solar_water_heating_id'], ['solarwaterheating.id'], ),
sa.ForeignKeyConstraint(['ventilation_and_cooling_id'], ['ventilationandcooling.id'], ),
sa.ForeignKeyConstraint(['water_heating_id'], ['waterheating.id'], ),
sa.ForeignKeyConstraint(['wind_turbine_id'], ['windturbine.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_table('windows',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('glazing_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('area_m2', sa.Float(), nullable=False),
sa.Column('roof_window', sa.Boolean(), nullable=False),
sa.Column('orientation', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('u_value_w_m2_k', sa.Integer(), nullable=False),
sa.Column('g_value', sa.Integer(), nullable=False),
sa.Column('property_detail_id', sa.Uuid(), nullable=True),
sa.ForeignKeyConstraint(['property_detail_id'], ['propertydetail.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_table('presitenote',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('summary_info_id', sa.Uuid(), nullable=False),
sa.Column('assessor_id', sa.Uuid(), nullable=False),
sa.Column('pre_site_note_description_id', sa.Uuid(), nullable=True),
sa.ForeignKeyConstraint(['assessor_id'], ['assessorinfo.id'], ),
sa.ForeignKeyConstraint(['pre_site_note_description_id'], ['propertydescription.id'], ),
sa.ForeignKeyConstraint(['summary_info_id'], ['presitenotessummaryinfo.id'], ),
sa.PrimaryKeyConstraint('id')
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('presitenote')
op.drop_table('windows')
op.drop_table('propertydescription')
op.drop_table('documents')
op.drop_table('dimension')
op.drop_table('propertydetail')
op.drop_table('heating')
op.drop_table('assessorinfo')
op.drop_table('windturbine')
op.drop_table('waterheating')
op.drop_table('walls')
op.drop_table('ventilationandcooling')
op.drop_table('solarwaterheating')
op.drop_table('showerandbaths')
op.drop_table('roofs')
op.drop_table('presitenotessummaryinfo')
op.drop_table('photovoltaicpanel')
op.drop_table('otherdetails')
op.drop_table('lighting')
op.drop_table('insulation')
op.drop_table('hotwatercylinder')
op.drop_table('heatingtype')
op.drop_table('heatingsystemcontrols')
op.drop_table('fluegasheatrecoverysystem')
op.drop_table('floors')
op.drop_table('door')
op.drop_table('companyinfo')
op.drop_table('buildings')
# ### end Alembic commands ###

View file

@ -3,6 +3,8 @@ from etl.surveyPrice.surveyPrice import SurveyPrice
from etl.surveyedData.surveryedData import surveyedDataProcessor
from etl.scraper.scraper import SharePointScraper, SharePointInstaller
from etl.db.db import get_db_session, init_db
import pandas as pd
from etl.db.db import get_db_session, init_db
from urllib.parse import unquote
@ -13,10 +15,10 @@ class HubspotTodb():
self.hubspot = HubSpotClient()
self.deals_in_hubspot = None
self.data_in_sharepoint = []
self.sp = SurveyPrice()
def get_all_deals(self):
sp = SurveyPrice()
self.deals_in_hubspot = sp.get_all_surveys_from_hubspot()
self.deals_in_hubspot = self.sp.get_all_surveys_from_hubspot()
return self.deals_in_hubspot
def get_sharepoint_path(self, url):
@ -72,6 +74,39 @@ class HubspotTodb():
each_file.append(file_path)
address_paths.update({address: each_file})
return address_paths
def string_to_installer(self, installer):
if installer.upper() == "J & J CRUMP":
return SharePointInstaller.JJC
elif installer.upper() == "SCIS":
return SharePointInstaller.SOUTH_COAST_INSULATION
else:
return None
def work_out_invoice(self, row):
survey = self.gather_data_from_sharepoint_url(row)
installer = self.string_to_installer(row["HUBSPOT_INSTALLER"])
survey_pd = pd.DataFrame([self.sp.survey_to_pandas_format(surveyInfo=survey, installer=installer)])
hubspot_data = pd.DataFrame([row])
merged_df = self.sp.merge_hub_spot_and_survey_information_from_sharepoint_url(hubspot_data, survey_pd)
return self.sp.calculate_all_price(merged_df)
# self.sp.calculate_one_price_with_sharepoint_url(row, )
def gather_data_from_sharepoint_url(self, row):
sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])
for add, file_loc in data_loc.items():
sdp = surveyedDataProcessor(add, file_loc)
sdp.hubspot_deal_id = row["HUBSPOT_DEAL_ID"]
with get_db_session() as session:
self.load_one_pre_site_note(session, sdp, row)
return sdp
def gather_data_from_each_sharepoint(self):
self.get_all_deals()
@ -90,29 +125,31 @@ class HubspotTodb():
if fast is False:
self.gather_data_from_each_sharepoint()
with get_db_session() as session:
self.load_pre_site_note(session)
self.load_all_pre_site_note(session)
session.commit()
def load_pre_site_note(self, db_session):
def load_one_pre_site_note(self, db_session, surveyedData, hubspot_data):
df = hubspot_data
assessor = surveyedData.load_assessor_table(db_session)
# Loads the pre site summary information
summary_info = surveyedData.load_pre_site_notes_summary_table(db_session)
property_description = surveyedData.load_property_description(db_session)
# Creates the a final pre site note table that links all information
presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description)
building_table = surveyedData.create_buildings_table(
db_session,
df["HUBSPOT_LANDLORD_ID"],
df["HUBSPOT_DOMNA_ID"],
)
documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table)
def load_all_pre_site_note(self, db_session):
# Loads all pre
for surveyedData in self.data_in_sharepoint:
# Loads Assessor information and Company information to db
assessor = surveyedData.load_assessor_table(db_session)
# Loads the pre site summary information
summary_info = surveyedData.load_pre_site_notes_summary_table(db_session)
property_description = surveyedData.load_property_description(db_session)
# Creates the a final pre site note table that links all information
presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description)
df = self.deals_in_hubspot
df = df[df["HUBSPOT_DEAL_ID"] == str(surveyedData.hubspot_deal_id)]
building_table = surveyedData.create_buildings_table(
db_session,
df["HUBSPOT_LANDLORD_ID"].values[0],
df["HUBSPOT_DOMNA_ID"].values[0],
)
documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table)
# Create building table or find building table to add new pre_site_note
self.load_one_pre_site_note(surveyedData=surveyedData, db_session=db_session)

View file

@ -4,6 +4,9 @@ from hubspot.crm.deals import PublicObjectSearchRequest
from hubspot.crm.deals.models import SimplePublicObjectInput
from etl.hubSpotClient.types import SubmissionInfoFromDeal
import time
from pydantic import ValidationError
from etl.utils.logger import Logger
import logging
@ -12,11 +15,13 @@ class DealStage(Enum):
SURVEYED_NO_ACCESS_NEED_SIGN_OFF = "1617223915"
CUSTOMER_CONTACTED = "888730834"
SURVEYED_COMPLETED_SIGNED_OFF = "1617223916"
NEEDS_ADDITIONAL_INFORMATION_FROM_ASSESSOR = "1887736000"
class HubSpotClient():
def __init__(self):
self.access_token = "pat-eu1-064f7f5c-a7d8-4d93-a9b2-b604da6164a6"
self.client = hubspot.Client.create(access_token=self.access_token)
self.logger = Logger(name='HubSpotClient', level=logging.INFO).get_logger()
def get_all_deals(self):
return self.client.crm.deals.get_all()
@ -199,20 +204,29 @@ class HubSpotClient():
all_deals = []
for deal in found_deals:
domna_id, landlord_id, uprn = self.get_domna_and_landlord_id(deal.id)
all_deals.append(SubmissionInfoFromDeal(
deal_id= deal.properties["hs_object_id"],
deal_name=deal.properties["dealname"],
work_type=deal.properties["work_type"],
needs_trickle_ventilation=True if deal.properties.get("property_needs_trickle_vents", "NO").upper() == "YES" else False,
post_sap_score=int(deal.properties["domna_survey_post_sap"]),
existing_wall_insulation=deal.properties.get("existing_wall_insulation") if deal.properties.get("existing_wall_insulation") else "None",
no_of_wet_rooms=int(deal.properties["number_of_wet_rooms_needing_ventilation"]),
installer=deal.properties["installer"],
submission_folder_path = deal.properties["submission_folder"],
landlord_id = landlord_id,
domna_id = domna_id,
uprn = uprn,
))
try:
all_deals.append(SubmissionInfoFromDeal(
deal_id= deal.properties["hs_object_id"],
deal_name=deal.properties["dealname"],
work_type=deal.properties["work_type"],
needs_trickle_ventilation=True if deal.properties.get("property_needs_trickle_vents", "NO").upper() == "YES" else False,
post_sap_score=int(deal.properties["domna_survey_post_sap"]),
existing_wall_insulation=deal.properties.get("existing_wall_insulation") if deal.properties.get("existing_wall_insulation") else "None",
no_of_wet_rooms=int(deal.properties["number_of_wet_rooms_needing_ventilation"]),
installer=deal.properties["installer"],
submission_folder_path = deal.properties["submission_folder"],
landlord_id = landlord_id,
domna_id = domna_id,
uprn = uprn,
))
except Exception as e:
deal_id = deal.properties['hs_object_id']
self.logger.info(f"Deal <{deal_id}> not valid")
self.move_deals_to_different_stage([deal_id], DealStage.NEEDS_ADDITIONAL_INFORMATION_FROM_ASSESSOR.value)
return all_deals
@ -235,4 +249,4 @@ class HubSpotClient():
deal_id,
simple_public_object_input=deal_properties
)
print(f"Deal {deal_id} moved to stage with ID {to_stage_id}.")
self.logger.info(f"Deal {deal_id} moved to stage with ID {to_stage_id}.")

View file

@ -2,6 +2,7 @@ from sqlmodel import Field, SQLModel
from sqlalchemy import Column
from sqlalchemy.dialects.postgresql import UUID
import uuid
from pydantic import Field, field_validator, ValidationError
class BaseModel(SQLModel):
id: uuid.UUID = Field(
@ -11,15 +12,22 @@ class BaseModel(SQLModel):
class SubmissionInfoFromDeal(BaseModel):
deal_id: str
deal_name: str
work_type: str
deal_id: str = Field(..., min_length=1)
deal_name: str = Field(..., min_length=1)
work_type: str = Field(..., min_length=1)
needs_trickle_ventilation: bool
post_sap_score: int
existing_wall_insulation: str
existing_wall_insulation: str = Field(..., min_length=1)
no_of_wet_rooms: int
installer: str
submission_folder_path: str
landlord_id: str
domna_id: str
uprn: str
installer: str = Field(..., min_length=1)
submission_folder_path: str = Field(..., min_length=1)
landlord_id: str = Field(..., min_length=1)
domna_id: str = Field(..., min_length=1)
uprn: str = Field(..., min_length=1)
@field_validator('post_sap_score', 'no_of_wet_rooms')
@classmethod
def must_be_non_negative(cls, v):
if v < 0:
raise ValidationError("Must be non-negative for Post Sap Score")
return v

View file

@ -0,0 +1,49 @@
import os
import pandas as pd
os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3"
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
# Local development
# os.environ["DATABASE_URL"] = "postgresql://postgres:makingwarmhomes@db:5432/postgres"
from etl.surveyPrice.surveyPrice import SurveyPrice
from etl.db.hubSpotLoad import HubspotTodb
# Load to db
dbLoader = HubspotTodb()
df = dbLoader.get_all_deals()
deal_ids = df["HUBSPOT_DEAL_ID"].tolist()
panda_final = []
for index, rows in df.iterrows():
invoice_row = dbLoader.work_out_invoice(rows)
panda_final.append(invoice_row)
panda_final = pd.concat(panda_final, ignore_index=True)
df = panda_final
sp = SurveyPrice()
verbose_file = "verbose_invoice_score.xlsx"
df.to_excel(verbose_file, index=False)
output_path = os.path.abspath(verbose_file)
sp.upload_to_sharepoint(output_path, verbose_file)
lewis_view = "FOR_LEWIS.xlsx"
selected_columns = ["HUBSPOT_INSTALLER", "HUBSPOT_DEAL_ADDRESS", "PRICE"]
minimal_df = df[selected_columns]
minimal_df.to_excel(lewis_view, index=False)
output_path = os.path.abspath(lewis_view)
sp.upload_to_sharepoint(output_path, lewis_view)
sp.upload_to_sharepoint(dbLoader.sp.get_master_rate_card_path(), "COPY_OF_RATE_CARD_USED.xlsx")
# Commented out as i don't want to sync up hubspot_to_db just yet
sp.move_deals_to_completed(deal_ids)

View file

@ -0,0 +1,135 @@
from monday import MondayClient
import json
import requests
import time
from tqdm import tqdm
import os
from etl.scraper.scraper import SharePointInstaller
from etl.scraper.scraper import SharePointScraper
board_id = "4965130190"
monday_key = "eyJhbGciOiJIUzI1NiJ9.eyJ0aWQiOjQ5ODc2ODQxOCwiYWFpIjoxMSwidWlkIjozNjE3ODAzNCwiaWFkIjoiMjAyNS0wNC0xMVQxMToyMzoxNy40NjdaIiwicGVyIjoibWU6d3JpdGUiLCJhY3RpZCI6MTM5OTc4MjMsInJnbiI6InVzZTEifQ.-2Lit4s46ZF6AXuMW9t0TxIaFLkHqD4Yo-PyM9i2XZY"
monday = MondayClient(monday_key)
# osmsis keys
os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d"
os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ"
os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf"
osmosis = SharePointScraper(SharePointInstaller.OSMOSIS_WAVE_2)
parent_folder = "/Osmosis ACD/Osmosis ACD Projects/"
# Change this per installer
parent_folder += "Stonewater/Wates"
def extract_asset_ids(item, file_column_id):
for col in item["column_values"]:
if col["id"] == file_column_id and col["value"]:
try:
value = json.loads(col["value"])
return [f["assetId"] for f in value.get("files", [])]
except Exception as e:
print(f"Error parsing file column: {e}")
return []
def get_public_url(asset_id):
url = "https://api.monday.com/v2"
headers = {
"Authorization": monday_key,
"Content-Type": "application/json"
}
query = {
"query": f"""
query {{
assets(ids: {asset_id}) {{
public_url
name
}}
}}
"""
}
response = requests.post(url, json=query, headers=headers)
response.raise_for_status()
asset = response.json()["data"]["assets"][0]
return asset["public_url"], asset["name"]
def download_file_from_public_url(public_url, filename):
local_path = os.path.join("/tmp", filename)
with requests.get(public_url, stream=True) as r:
r.raise_for_status()
with open(local_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return local_path
def get_all_items(board_id, monday):
# Parameters
limit = 25 # Adjust the limit based on how many items you want per request
all_items = [] # List to store all fetched items
cursor = None # Start without a cursor for the first page
# Loop through pages
while True:
# Fetch items for the current page
response = monday.boards.fetch_items_by_board_id(
board_ids=board_id,
limit=limit,
cursor=cursor
)
items = response['data']['boards'][0]['items_page']['items']
# If no items are returned, stop the loop
if not items:
break
# Append items from this page to the all_items list
all_items.extend(items)
# Get the cursor for the next page (if there is one)
cursor = response['data']['boards'][0]['items_page'].get('cursor') # Get the current cursor
# If there's no cursor, we've reached the last page
if not cursor:
break
print(f"cursor {cursor}")
print(f"len all_itemms {len(all_items)}")
return all_items
def upload_to_sharepoint(to_upload, master_folder_name):
osmosis.create_dir(master_folder_name, parent_folder)
for file_path in to_upload:
print(f"uploading {file_path}")
osmosis.upload_file(file_path, parent_folder + f"/{master_folder_name}", file_path[5:])
# Step 1: Fetch column IDs
board_data = monday.boards.fetch_boards_by_id(board_id)
columns = board_data["data"]["boards"][0]["columns"]
col_id_map = {col["title"].lower(): col["id"] for col in columns}
name_id = col_id_map.get("name") # Replace with actual title if different
files_id = col_id_map.get("file(s)") # Replace with actual title if different
if not name_id or not files_id:
raise Exception("Could not find 'name' or 'file(s)' columns")
items = get_all_items(board_id, monday)
for item in tqdm(items):
item_name = item["name"]
asset_ids = extract_asset_ids(item, files_id)
to_upload = []
for asset_id in asset_ids:
try:
public_url, file_name = get_public_url(asset_id)
print(f"Downloading {file_name} from {public_url}")
file_path = download_file_from_public_url(public_url, file_name)
to_upload.append(file_path)
except Exception as e:
print(f"Failed to download/upload asset {asset_id}: {e}")
if to_upload:
upload_to_sharepoint(to_upload, item_name)

View file

@ -14,6 +14,7 @@ class SurveyPrice():
self.master_rate_card_path = None
self.all_hubspot_submissions = None
self.all_survey_info_from_sharepoint = None
self.downloaded_price_card = False
self.required_sheets = [
@ -66,16 +67,17 @@ class SurveyPrice():
}
def download_price_card(self):
url = None
# TODO: Some sanity checks to ensure rate cards title stays consistent
for files in self.sharepoint_client.get_folders_in_path("/Commercials/Rate Cards")['value']:
if files['name'] == "MASTER RATE CARD.xlsx":
url = files['@microsoft.graph.downloadUrl']
break
if self.downloaded_price_card is False:
url = None
# TODO: Some sanity checks to ensure rate cards title stays consistent
for files in self.sharepoint_client.get_folders_in_path("/Commercials/Rate Cards")['value']:
if files['name'] == "MASTER RATE CARD.xlsx":
url = files['@microsoft.graph.downloadUrl']
break
if url:
content = self.sharepoint_client.get_file_content(url)
self.master_rate_card_path = self.sharepoint_client.create_temp_file(content, "rate_card/rate_card_all.xlsx")
if url:
content = self.sharepoint_client.get_file_content(url)
self.master_rate_card_path = self.sharepoint_client.create_temp_file(content, "rate_card/rate_card_all.xlsx")
return self.master_rate_card_path
@ -169,19 +171,7 @@ class SurveyPrice():
self.all_survey_info_from_sharepoint = pd.concat([jjc_pd, scis_pd], ignore_index=True)
return self.all_survey_info_from_sharepoint
def sharepoint_data_for_installer(self, installer):
sp = SharePointScraper(installer)
file_paths = sp.download_file_for_each_address()
surveys = []
for eachAddress in tqdm(file_paths):
for address, files in eachAddress.items():
surveys.append(surveyedDataProcessor(address, files))
all_survey_info = []
for surveyInfo in surveys:
def survey_to_pandas_format(self, surveyInfo, installer):
cavity_wall_as_built = False
csr = False
foam_insulation = False
@ -240,16 +230,28 @@ class SurveyPrice():
"DOMNA JOB TYPE": "ECO4 PV"
})
return info
def sharepoint_data_for_installer(self, installer):
sp = SharePointScraper(installer)
file_paths = sp.download_file_for_each_address()
surveys = []
for eachAddress in tqdm(file_paths):
for address, files in eachAddress.items():
surveys.append(surveyedDataProcessor(address, files))
all_survey_info = []
for surveyInfo in surveys:
info = self.survey_to_pandas_format(surveyInfo)
all_survey_info.append(info)
return pd.DataFrame(all_survey_info)
def merge_hub_spot_and_survey_information(self):
if self.all_survey_info_from_sharepoint is None:
raise RuntimeError("No survey information found from Sharepoint")
if self.all_hubspot_submissions is None:
raise RuntimeError("No information found from Hubspot")
def merge_hub_spot_and_survey_information_from_sharepoint_url(self, hubspot_data, survey_data):
# Standardise address
def extract_start_and_postcode(addr):
@ -261,23 +263,23 @@ class SurveyPrice():
return start, postcode
# Extract start + postcode from both datasets
self.all_survey_info_from_sharepoint[['address_start', 'postcode']] = self.all_survey_info_from_sharepoint['SHAREPOINT ADDRESS'].apply(
survey_data[['address_start', 'postcode']] = survey_data['SHAREPOINT ADDRESS'].apply(
lambda x: pd.Series(extract_start_and_postcode(x))
)
self.all_hubspot_submissions[['address_start', 'postcode']] = self.all_hubspot_submissions['HUBSPOT_DEAL_ADDRESS'].apply(
hubspot_data[['address_start', 'postcode']] = hubspot_data['HUBSPOT_DEAL_ADDRESS'].apply(
lambda x: pd.Series(extract_start_and_postcode(x))
)
# re-name to installer
self.all_survey_info_from_sharepoint = self.all_survey_info_from_sharepoint.rename(
survey_data = survey_data.rename(
columns={
'SHAREPOINT FLOOR_AREA_BANDING': 'FLOOR_AREA_BANDING',
}
)
self.all_hubspot_submissions = self.all_hubspot_submissions.rename(
hubspot_data = hubspot_data.rename(
columns={
'HUBSPOT_WETROOMS': 'NO_OF_WETROOMS',
'HUBSPOT_TRICKLE_VENT': 'TRICKLE_VENT',
@ -285,8 +287,8 @@ class SurveyPrice():
)
merged_df = pd.merge(
self.all_survey_info_from_sharepoint,
self.all_hubspot_submissions,
survey_data,
hubspot_data,
on=['address_start', 'postcode'],
how='inner'
)
@ -294,8 +296,6 @@ class SurveyPrice():
# if hubspot detects
merged_df.drop(columns=['address_start', 'postcode'], inplace=True)
def compute_energy_grant(row):
pre_band_letter = row["SHAREPOINT PRE_INSTALL_SAP_SCORE_BANDING"][-1]
post_band_letter = surveyedDataProcessor.get_band(row["HUBSPOT_POST_INSTALL_SAP_SCORE"])[-1]
@ -320,12 +320,28 @@ class SurveyPrice():
merged_df["WORK TYPE"] = merged_df.apply(work_type, axis=1)
return merged_df
def calculate_all_price(self):
def merge_hub_spot_and_survey_information(self):
if self.all_survey_info_from_sharepoint is None:
raise RuntimeError("No survey information found from Sharepoint")
if self.all_hubspot_submissions is None:
raise RuntimeError("No information found from Hubspot")
merged_df = self.merge_hub_spot_and_survey_information_from_sharepoint_url(self.all_hubspot_submissions, self.all_survey_info_from_sharepoint)
return merged_df
def calculate_all_price(self, merged_data=None):
self.download_price_card()
self.get_all_surveys_from_hubspot()
self.get_all_surveyed_data_from_sharepoint()
submission_data = self.merge_hub_spot_and_survey_information()
if merged_data is None:
self.get_all_surveys_from_hubspot()
self.get_all_surveyed_data_from_sharepoint()
submission_data = self.merge_hub_spot_and_survey_information()
else:
submission_data = merged_data
final_list = []
for _, row in submission_data.iterrows():
if "PV" in row["HUBSPOT_WORK_TYPE"].upper():

View file

@ -3,21 +3,18 @@ import os
class Logger:
def __init__(self, name, level=logging.INFO):
# Create a custom logger
self.logger = logging.getLogger(name)
self.logger.setLevel(level)
# Create handlers
c_handler = logging.StreamHandler()
c_handler.setLevel(level)
# ✅ Prevent adding multiple handlers
if not self.logger.handlers:
c_handler = logging.StreamHandler()
c_handler.setLevel(level)
# Create formatters and add it to handlers
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
c_handler.setFormatter(formatter)
# Add handlers to the logger
self.logger.addHandler(c_handler)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
c_handler.setFormatter(formatter)
self.logger.addHandler(c_handler)
def get_logger(self):
return self.logger