diff --git a/.github/workflows/hubspot_to_invoice.yml b/.github/workflows/hubspot_to_invoice.yml index 937c7fe..f2c78c8 100644 --- a/.github/workflows/hubspot_to_invoice.yml +++ b/.github/workflows/hubspot_to_invoice.yml @@ -5,7 +5,7 @@ on: workflow_dispatch: jobs: - sharepoint-validator: + hubspot-deals-to-db: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 @@ -24,7 +24,7 @@ jobs: run: | pwd ls -la - poetry run python etl/hubspot_to_invoice.py + poetry run python etl/hubspot_to_invoice_rewrite.py env: PYTHONPATH: ${{ github.workspace }} DATABASE_URL: postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB diff --git a/alembic/versions/427e65da69c1_initial_table.py b/alembic/versions/427e65da69c1_initial_table.py new file mode 100644 index 0000000..328f3bf --- /dev/null +++ b/alembic/versions/427e65da69c1_initial_table.py @@ -0,0 +1,354 @@ +"""Initial table + +Revision ID: 427e65da69c1 +Revises: +Create Date: 2025-05-14 15:36:08.611971 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +import sqlmodel + + +# revision identifiers, used by Alembic. +revision: str = '427e65da69c1' +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('buildings', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('address', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('postcode', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('UPRN', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('landlord_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('domna_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('companyinfo', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('address', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('trading_name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('post_code', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('fax_number', sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column('related_party_disclosure', sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('door', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('no_of_doors', sa.Integer(), nullable=False), + sa.Column('no_of_insulated_doors', sa.Integer(), nullable=False), + sa.Column('u_value_w_m2_k', sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('floors', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('floor_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('ground_floor_construction', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('ground_floor_insulation_type', sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column('floor_insulation_thickness_mm', sa.Float(), nullable=True), + sa.Column('u_value_known', sa.Boolean(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('fluegasheatrecoverysystem', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('fghrs_present', sa.Boolean(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('heatingsystemcontrols', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('control_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('flue_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('fan_assisted_flue', sa.Boolean(), nullable=False), + sa.Column('heat_emitter_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('electricity_meter_type', sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column('mains_gas_available', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('heatingtype', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('heating_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('fuel_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('hotwatercylinder', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('volume', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('insulation_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('insulation_thickness', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('thermostat', sa.Boolean(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('insulation', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('lighting', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('total_no_of_light_fittings', sa.Integer(), nullable=False), + sa.Column('total_no_of_lel_fittings', sa.Integer(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('otherdetails', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('electricity_meter_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('main_gas_avalible', sa.Boolean(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('photovoltaicpanel', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('pvs_are_connected_to_dwelling_electricity_meter', sa.Boolean(), nullable=False), + sa.Column('percentage_of_external_roof_area_with_pvs', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('presitenotessummaryinfo', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('reference_number', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('epc_language', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('uprn', sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column('postcode', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('region', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('address', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('town', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('county', sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column('property_tenure', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('transaction_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('inspection_date', sa.DateTime(), nullable=False), + sa.Column('current_sap', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('potential_sap', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('current_ei', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('potential_ei', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('current_annual_emissions', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('current_annual_emission_including_0925_multiplayer', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('current_annual_energy_costs', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('roofs', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('construction', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('insulation_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('insulation_thickness', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('u_value_known', sa.Boolean(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('showerandbaths', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('no_of_rooms_with_baths_and_or_shower', sa.Integer(), nullable=False), + sa.Column('no_of_rooms_with_mixer_shower_and_no_baths', sa.Integer(), nullable=False), + sa.Column('no_of_rooms_with_mixer_shower_and_baths', sa.Integer(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('solarwaterheating', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('solar_water_heating_details_known', sa.Boolean(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('ventilationandcooling', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('no_of_open_fireplaces', sa.Integer(), nullable=False), + sa.Column('ventilation_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('space_cooling_system_present', sa.Boolean(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('walls', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('construction', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('insulation', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('insulation_thickness_mm', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('wall_thickness_measured', sa.Boolean(), nullable=False), + sa.Column('wall_thickness_mm', sa.Integer(), nullable=True), + sa.Column('u_value_known', sa.Boolean(), nullable=False), + sa.Column('u_value_w_m2_k', sa.Float(), nullable=True), + sa.Column('dry_lining', sa.Boolean(), nullable=False), + sa.Column('alternative_wall_present', sa.Boolean(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('waterheating', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('heating_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('fuel_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('windturbine', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('wind_turbine', sa.Boolean(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('assessorinfo', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('accreditation_number', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('phone_number', sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column('email_address', sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column('company_id', sa.Uuid(), nullable=True), + sa.ForeignKeyConstraint(['company_id'], ['companyinfo.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('heating', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('heating_source', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('efficiency_source', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('heating_fuel', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('brand_name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('model_name', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('model_qualifer', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('sap_2009_table', sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column('percentage_of_heated_floor_area_served', sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column('controls_id', sa.Uuid(), nullable=True), + sa.ForeignKeyConstraint(['controls_id'], ['heatingsystemcontrols.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('propertydetail', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('age_band', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('wall_id', sa.Uuid(), nullable=True), + sa.Column('roof_id', sa.Uuid(), nullable=True), + sa.Column('floor_id', sa.Uuid(), nullable=True), + sa.ForeignKeyConstraint(['floor_id'], ['floors.id'], ), + sa.ForeignKeyConstraint(['roof_id'], ['roofs.id'], ), + sa.ForeignKeyConstraint(['wall_id'], ['walls.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('dimension', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('floor_area_m2', sa.Float(), nullable=False), + sa.Column('room_height_m', sa.Float(), nullable=False), + sa.Column('loss_perimeter_m', sa.Float(), nullable=False), + sa.Column('party_wall_length_m', sa.Float(), nullable=False), + sa.Column('property_detail_id', sa.Uuid(), nullable=True), + sa.ForeignKeyConstraint(['property_detail_id'], ['propertydetail.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('documents', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('assessor_id', sa.Uuid(), nullable=False), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.Column('document_type', sa.Enum('QUIDOS_PRESITE_NOTE', 'CHARTED_SURVEYOR_REPORT', 'ENERGY_PERFORMANCE_REPORT', 'U_VALUE_CALCULATOR_REPORT', 'OVERWRITING_U_VALUE_DECLARATION_FORM', name='reporttype'), nullable=False), + sa.Column('building_id', sa.Uuid(), nullable=False), + sa.Column('target_table', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('target_id', sa.Uuid(), nullable=False), + sa.ForeignKeyConstraint(['assessor_id'], ['assessorinfo.id'], ), + sa.ForeignKeyConstraint(['building_id'], ['buildings.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('propertydescription', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('built_form', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('detachment_or_position', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('no_of_main_property', sa.Integer(), nullable=False), + sa.Column('no_of_extension_1', sa.Integer(), nullable=True), + sa.Column('no_of_extension_2', sa.Integer(), nullable=True), + sa.Column('no_of_extension_3', sa.Integer(), nullable=True), + sa.Column('no_of_extension_4', sa.Integer(), nullable=True), + sa.Column('no_of_habitable_rooms', sa.Integer(), nullable=False), + sa.Column('no_of_heated_rooms', sa.Integer(), nullable=False), + sa.Column('heated_basement', sa.Boolean(), nullable=False), + sa.Column('conservatory_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('percentage_of_draught_proofed', sa.Integer(), nullable=False), + sa.Column('terrain_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('conservatory', sa.Boolean(), nullable=False), + sa.Column('main_property_id', sa.Uuid(), nullable=False), + sa.Column('ex1_property_id', sa.Uuid(), nullable=True), + sa.Column('ex2_property_id', sa.Uuid(), nullable=True), + sa.Column('ex3_property_id', sa.Uuid(), nullable=True), + sa.Column('ex4_property_id', sa.Uuid(), nullable=True), + sa.Column('door_id', sa.Uuid(), nullable=True), + sa.Column('ventilation_and_cooling_id', sa.Uuid(), nullable=True), + sa.Column('lighting_id', sa.Uuid(), nullable=True), + sa.Column('water_heating_id', sa.Uuid(), nullable=True), + sa.Column('hot_water_cylinder_id', sa.Uuid(), nullable=True), + sa.Column('solar_water_heating_id', sa.Uuid(), nullable=True), + sa.Column('shower_and_baths_id', sa.Uuid(), nullable=True), + sa.Column('flue_gas_heat_recovery_system_id', sa.Uuid(), nullable=True), + sa.Column('photovoltaic_panel_id', sa.Uuid(), nullable=True), + sa.Column('wind_turbine_id', sa.Uuid(), nullable=True), + sa.Column('other_details_id', sa.Uuid(), nullable=True), + sa.Column('main_heating_id', sa.Uuid(), nullable=True), + sa.Column('main_heating2_id', sa.Uuid(), nullable=True), + sa.Column('secondary_heating_type_id', sa.Uuid(), nullable=True), + sa.ForeignKeyConstraint(['door_id'], ['door.id'], ), + sa.ForeignKeyConstraint(['ex1_property_id'], ['propertydetail.id'], ), + sa.ForeignKeyConstraint(['ex2_property_id'], ['propertydetail.id'], ), + sa.ForeignKeyConstraint(['ex3_property_id'], ['propertydetail.id'], ), + sa.ForeignKeyConstraint(['ex4_property_id'], ['propertydetail.id'], ), + sa.ForeignKeyConstraint(['flue_gas_heat_recovery_system_id'], ['fluegasheatrecoverysystem.id'], ), + sa.ForeignKeyConstraint(['hot_water_cylinder_id'], ['hotwatercylinder.id'], ), + sa.ForeignKeyConstraint(['lighting_id'], ['lighting.id'], ), + sa.ForeignKeyConstraint(['main_heating2_id'], ['heating.id'], ), + sa.ForeignKeyConstraint(['main_heating_id'], ['heating.id'], ), + sa.ForeignKeyConstraint(['main_property_id'], ['propertydetail.id'], ), + sa.ForeignKeyConstraint(['other_details_id'], ['otherdetails.id'], ), + sa.ForeignKeyConstraint(['photovoltaic_panel_id'], ['photovoltaicpanel.id'], ), + sa.ForeignKeyConstraint(['secondary_heating_type_id'], ['heatingtype.id'], ), + sa.ForeignKeyConstraint(['shower_and_baths_id'], ['showerandbaths.id'], ), + sa.ForeignKeyConstraint(['solar_water_heating_id'], ['solarwaterheating.id'], ), + sa.ForeignKeyConstraint(['ventilation_and_cooling_id'], ['ventilationandcooling.id'], ), + sa.ForeignKeyConstraint(['water_heating_id'], ['waterheating.id'], ), + sa.ForeignKeyConstraint(['wind_turbine_id'], ['windturbine.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('windows', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('glazing_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('area_m2', sa.Float(), nullable=False), + sa.Column('roof_window', sa.Boolean(), nullable=False), + sa.Column('orientation', sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column('u_value_w_m2_k', sa.Integer(), nullable=False), + sa.Column('g_value', sa.Integer(), nullable=False), + sa.Column('property_detail_id', sa.Uuid(), nullable=True), + sa.ForeignKeyConstraint(['property_detail_id'], ['propertydetail.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('presitenote', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('summary_info_id', sa.Uuid(), nullable=False), + sa.Column('assessor_id', sa.Uuid(), nullable=False), + sa.Column('pre_site_note_description_id', sa.Uuid(), nullable=True), + sa.ForeignKeyConstraint(['assessor_id'], ['assessorinfo.id'], ), + sa.ForeignKeyConstraint(['pre_site_note_description_id'], ['propertydescription.id'], ), + sa.ForeignKeyConstraint(['summary_info_id'], ['presitenotessummaryinfo.id'], ), + sa.PrimaryKeyConstraint('id') + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('presitenote') + op.drop_table('windows') + op.drop_table('propertydescription') + op.drop_table('documents') + op.drop_table('dimension') + op.drop_table('propertydetail') + op.drop_table('heating') + op.drop_table('assessorinfo') + op.drop_table('windturbine') + op.drop_table('waterheating') + op.drop_table('walls') + op.drop_table('ventilationandcooling') + op.drop_table('solarwaterheating') + op.drop_table('showerandbaths') + op.drop_table('roofs') + op.drop_table('presitenotessummaryinfo') + op.drop_table('photovoltaicpanel') + op.drop_table('otherdetails') + op.drop_table('lighting') + op.drop_table('insulation') + op.drop_table('hotwatercylinder') + op.drop_table('heatingtype') + op.drop_table('heatingsystemcontrols') + op.drop_table('fluegasheatrecoverysystem') + op.drop_table('floors') + op.drop_table('door') + op.drop_table('companyinfo') + op.drop_table('buildings') + # ### end Alembic commands ### diff --git a/etl/db/hubSpotLoad.py b/etl/db/hubSpotLoad.py index a893245..f4989a0 100644 --- a/etl/db/hubSpotLoad.py +++ b/etl/db/hubSpotLoad.py @@ -3,6 +3,8 @@ from etl.surveyPrice.surveyPrice import SurveyPrice from etl.surveyedData.surveryedData import surveyedDataProcessor from etl.scraper.scraper import SharePointScraper, SharePointInstaller from etl.db.db import get_db_session, init_db +import pandas as pd +from etl.db.db import get_db_session, init_db from urllib.parse import unquote @@ -13,10 +15,10 @@ class HubspotTodb(): self.hubspot = HubSpotClient() self.deals_in_hubspot = None self.data_in_sharepoint = [] + self.sp = SurveyPrice() def get_all_deals(self): - sp = SurveyPrice() - self.deals_in_hubspot = sp.get_all_surveys_from_hubspot() + self.deals_in_hubspot = self.sp.get_all_surveys_from_hubspot() return self.deals_in_hubspot def get_sharepoint_path(self, url): @@ -72,6 +74,39 @@ class HubspotTodb(): each_file.append(file_path) address_paths.update({address: each_file}) return address_paths + + def string_to_installer(self, installer): + if installer.upper() == "J & J CRUMP": + return SharePointInstaller.JJC + elif installer.upper() == "SCIS": + return SharePointInstaller.SOUTH_COAST_INSULATION + else: + return None + + def work_out_invoice(self, row): + survey = self.gather_data_from_sharepoint_url(row) + installer = self.string_to_installer(row["HUBSPOT_INSTALLER"]) + survey_pd = pd.DataFrame([self.sp.survey_to_pandas_format(surveyInfo=survey, installer=installer)]) + hubspot_data = pd.DataFrame([row]) + merged_df = self.sp.merge_hub_spot_and_survey_information_from_sharepoint_url(hubspot_data, survey_pd) + return self.sp.calculate_all_price(merged_df) + + + + # self.sp.calculate_one_price_with_sharepoint_url(row, ) + + def gather_data_from_sharepoint_url(self, row): + sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"]) + path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"]) + data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"]) + + for add, file_loc in data_loc.items(): + sdp = surveyedDataProcessor(add, file_loc) + sdp.hubspot_deal_id = row["HUBSPOT_DEAL_ID"] + with get_db_session() as session: + self.load_one_pre_site_note(session, sdp, row) + return sdp + def gather_data_from_each_sharepoint(self): self.get_all_deals() @@ -90,29 +125,31 @@ class HubspotTodb(): if fast is False: self.gather_data_from_each_sharepoint() with get_db_session() as session: - self.load_pre_site_note(session) + self.load_all_pre_site_note(session) session.commit() - def load_pre_site_note(self, db_session): + def load_one_pre_site_note(self, db_session, surveyedData, hubspot_data): + df = hubspot_data + assessor = surveyedData.load_assessor_table(db_session) + + # Loads the pre site summary information + summary_info = surveyedData.load_pre_site_notes_summary_table(db_session) + + property_description = surveyedData.load_property_description(db_session) + + # Creates the a final pre site note table that links all information + presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description) + + building_table = surveyedData.create_buildings_table( + db_session, + df["HUBSPOT_LANDLORD_ID"], + df["HUBSPOT_DOMNA_ID"], + ) + documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table) + + def load_all_pre_site_note(self, db_session): + # Loads all pre for surveyedData in self.data_in_sharepoint: - # Loads Assessor information and Company information to db - assessor = surveyedData.load_assessor_table(db_session) - # Loads the pre site summary information - summary_info = surveyedData.load_pre_site_notes_summary_table(db_session) - - property_description = surveyedData.load_property_description(db_session) - - # Creates the a final pre site note table that links all information - presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description) - - df = self.deals_in_hubspot - df = df[df["HUBSPOT_DEAL_ID"] == str(surveyedData.hubspot_deal_id)] - building_table = surveyedData.create_buildings_table( - db_session, - df["HUBSPOT_LANDLORD_ID"].values[0], - df["HUBSPOT_DOMNA_ID"].values[0], - ) - documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table) - # Create building table or find building table to add new pre_site_note + self.load_one_pre_site_note(surveyedData=surveyedData, db_session=db_session) diff --git a/etl/hubSpotClient/hubspot.py b/etl/hubSpotClient/hubspot.py index 0866d24..a05c309 100644 --- a/etl/hubSpotClient/hubspot.py +++ b/etl/hubSpotClient/hubspot.py @@ -4,6 +4,9 @@ from hubspot.crm.deals import PublicObjectSearchRequest from hubspot.crm.deals.models import SimplePublicObjectInput from etl.hubSpotClient.types import SubmissionInfoFromDeal import time +from pydantic import ValidationError +from etl.utils.logger import Logger +import logging @@ -12,11 +15,13 @@ class DealStage(Enum): SURVEYED_NO_ACCESS_NEED_SIGN_OFF = "1617223915" CUSTOMER_CONTACTED = "888730834" SURVEYED_COMPLETED_SIGNED_OFF = "1617223916" + NEEDS_ADDITIONAL_INFORMATION_FROM_ASSESSOR = "1887736000" class HubSpotClient(): def __init__(self): self.access_token = "pat-eu1-064f7f5c-a7d8-4d93-a9b2-b604da6164a6" self.client = hubspot.Client.create(access_token=self.access_token) + self.logger = Logger(name='HubSpotClient', level=logging.INFO).get_logger() def get_all_deals(self): return self.client.crm.deals.get_all() @@ -199,20 +204,29 @@ class HubSpotClient(): all_deals = [] for deal in found_deals: domna_id, landlord_id, uprn = self.get_domna_and_landlord_id(deal.id) - all_deals.append(SubmissionInfoFromDeal( - deal_id= deal.properties["hs_object_id"], - deal_name=deal.properties["dealname"], - work_type=deal.properties["work_type"], - needs_trickle_ventilation=True if deal.properties.get("property_needs_trickle_vents", "NO").upper() == "YES" else False, - post_sap_score=int(deal.properties["domna_survey_post_sap"]), - existing_wall_insulation=deal.properties.get("existing_wall_insulation") if deal.properties.get("existing_wall_insulation") else "None", - no_of_wet_rooms=int(deal.properties["number_of_wet_rooms_needing_ventilation"]), - installer=deal.properties["installer"], - submission_folder_path = deal.properties["submission_folder"], - landlord_id = landlord_id, - domna_id = domna_id, - uprn = uprn, - )) + try: + all_deals.append(SubmissionInfoFromDeal( + deal_id= deal.properties["hs_object_id"], + deal_name=deal.properties["dealname"], + work_type=deal.properties["work_type"], + needs_trickle_ventilation=True if deal.properties.get("property_needs_trickle_vents", "NO").upper() == "YES" else False, + post_sap_score=int(deal.properties["domna_survey_post_sap"]), + existing_wall_insulation=deal.properties.get("existing_wall_insulation") if deal.properties.get("existing_wall_insulation") else "None", + no_of_wet_rooms=int(deal.properties["number_of_wet_rooms_needing_ventilation"]), + installer=deal.properties["installer"], + submission_folder_path = deal.properties["submission_folder"], + landlord_id = landlord_id, + domna_id = domna_id, + uprn = uprn, + )) + except Exception as e: + deal_id = deal.properties['hs_object_id'] + self.logger.info(f"Deal <{deal_id}> not valid") + self.move_deals_to_different_stage([deal_id], DealStage.NEEDS_ADDITIONAL_INFORMATION_FROM_ASSESSOR.value) + + + + return all_deals @@ -235,4 +249,4 @@ class HubSpotClient(): deal_id, simple_public_object_input=deal_properties ) - print(f"Deal {deal_id} moved to stage with ID {to_stage_id}.") + self.logger.info(f"Deal {deal_id} moved to stage with ID {to_stage_id}.") diff --git a/etl/hubSpotClient/types.py b/etl/hubSpotClient/types.py index f95603f..82c9cd3 100644 --- a/etl/hubSpotClient/types.py +++ b/etl/hubSpotClient/types.py @@ -2,6 +2,7 @@ from sqlmodel import Field, SQLModel from sqlalchemy import Column from sqlalchemy.dialects.postgresql import UUID import uuid +from pydantic import Field, field_validator, ValidationError class BaseModel(SQLModel): id: uuid.UUID = Field( @@ -11,15 +12,22 @@ class BaseModel(SQLModel): class SubmissionInfoFromDeal(BaseModel): - deal_id: str - deal_name: str - work_type: str + deal_id: str = Field(..., min_length=1) + deal_name: str = Field(..., min_length=1) + work_type: str = Field(..., min_length=1) needs_trickle_ventilation: bool post_sap_score: int - existing_wall_insulation: str + existing_wall_insulation: str = Field(..., min_length=1) no_of_wet_rooms: int - installer: str - submission_folder_path: str - landlord_id: str - domna_id: str - uprn: str \ No newline at end of file + installer: str = Field(..., min_length=1) + submission_folder_path: str = Field(..., min_length=1) + landlord_id: str = Field(..., min_length=1) + domna_id: str = Field(..., min_length=1) + uprn: str = Field(..., min_length=1) + + @field_validator('post_sap_score', 'no_of_wet_rooms') + @classmethod + def must_be_non_negative(cls, v): + if v < 0: + raise ValidationError("Must be non-negative for Post Sap Score") + return v \ No newline at end of file diff --git a/etl/hubspot_to_invoice_rewrite.py b/etl/hubspot_to_invoice_rewrite.py new file mode 100644 index 0000000..0a616a5 --- /dev/null +++ b/etl/hubspot_to_invoice_rewrite.py @@ -0,0 +1,49 @@ +import os +import pandas as pd +os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf" +os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E" +os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f" +os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3" +os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284" + +# Local development +# os.environ["DATABASE_URL"] = "postgresql://postgres:makingwarmhomes@db:5432/postgres" + +from etl.surveyPrice.surveyPrice import SurveyPrice +from etl.db.hubSpotLoad import HubspotTodb + +# Load to db +dbLoader = HubspotTodb() + +df = dbLoader.get_all_deals() +deal_ids = df["HUBSPOT_DEAL_ID"].tolist() + +panda_final = [] +for index, rows in df.iterrows(): + invoice_row = dbLoader.work_out_invoice(rows) + panda_final.append(invoice_row) + +panda_final = pd.concat(panda_final, ignore_index=True) + + +df = panda_final +sp = SurveyPrice() + +verbose_file = "verbose_invoice_score.xlsx" +df.to_excel(verbose_file, index=False) +output_path = os.path.abspath(verbose_file) +sp.upload_to_sharepoint(output_path, verbose_file) + +lewis_view = "FOR_LEWIS.xlsx" +selected_columns = ["HUBSPOT_INSTALLER", "HUBSPOT_DEAL_ADDRESS", "PRICE"] +minimal_df = df[selected_columns] +minimal_df.to_excel(lewis_view, index=False) +output_path = os.path.abspath(lewis_view) +sp.upload_to_sharepoint(output_path, lewis_view) + +sp.upload_to_sharepoint(dbLoader.sp.get_master_rate_card_path(), "COPY_OF_RATE_CARD_USED.xlsx") + + +# Commented out as i don't want to sync up hubspot_to_db just yet +sp.move_deals_to_completed(deal_ids) + diff --git a/etl/osmosis_complaince_address_to_files.py b/etl/osmosis_complaince_address_to_files.py new file mode 100644 index 0000000..9f8a64c --- /dev/null +++ b/etl/osmosis_complaince_address_to_files.py @@ -0,0 +1,135 @@ +from monday import MondayClient +import json +import requests +import time +from tqdm import tqdm +import os +from etl.scraper.scraper import SharePointInstaller +from etl.scraper.scraper import SharePointScraper + +board_id = "4965130190" +monday_key = "eyJhbGciOiJIUzI1NiJ9.eyJ0aWQiOjQ5ODc2ODQxOCwiYWFpIjoxMSwidWlkIjozNjE3ODAzNCwiaWFkIjoiMjAyNS0wNC0xMVQxMToyMzoxNy40NjdaIiwicGVyIjoibWU6d3JpdGUiLCJhY3RpZCI6MTM5OTc4MjMsInJnbiI6InVzZTEifQ.-2Lit4s46ZF6AXuMW9t0TxIaFLkHqD4Yo-PyM9i2XZY" +monday = MondayClient(monday_key) + + +# osmsis keys +os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d" +os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ" +os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf" +osmosis = SharePointScraper(SharePointInstaller.OSMOSIS_WAVE_2) +parent_folder = "/Osmosis ACD/Osmosis ACD Projects/" + +# Change this per installer +parent_folder += "Stonewater/Wates" + + +def extract_asset_ids(item, file_column_id): + for col in item["column_values"]: + if col["id"] == file_column_id and col["value"]: + try: + value = json.loads(col["value"]) + return [f["assetId"] for f in value.get("files", [])] + except Exception as e: + print(f"Error parsing file column: {e}") + return [] + +def get_public_url(asset_id): + url = "https://api.monday.com/v2" + headers = { + "Authorization": monday_key, + "Content-Type": "application/json" + } + query = { + "query": f""" + query {{ + assets(ids: {asset_id}) {{ + public_url + name + }} + }} + """ + } + response = requests.post(url, json=query, headers=headers) + response.raise_for_status() + asset = response.json()["data"]["assets"][0] + return asset["public_url"], asset["name"] + +def download_file_from_public_url(public_url, filename): + local_path = os.path.join("/tmp", filename) + with requests.get(public_url, stream=True) as r: + r.raise_for_status() + with open(local_path, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + return local_path + + +def get_all_items(board_id, monday): + # Parameters + limit = 25 # Adjust the limit based on how many items you want per request + all_items = [] # List to store all fetched items + cursor = None # Start without a cursor for the first page + + # Loop through pages + while True: + # Fetch items for the current page + response = monday.boards.fetch_items_by_board_id( + board_ids=board_id, + limit=limit, + cursor=cursor + ) + + items = response['data']['boards'][0]['items_page']['items'] + + # If no items are returned, stop the loop + if not items: + break + + # Append items from this page to the all_items list + all_items.extend(items) + + # Get the cursor for the next page (if there is one) + cursor = response['data']['boards'][0]['items_page'].get('cursor') # Get the current cursor + + # If there's no cursor, we've reached the last page + if not cursor: + break + print(f"cursor {cursor}") + print(f"len all_itemms {len(all_items)}") + return all_items + +def upload_to_sharepoint(to_upload, master_folder_name): + osmosis.create_dir(master_folder_name, parent_folder) + for file_path in to_upload: + print(f"uploading {file_path}") + osmosis.upload_file(file_path, parent_folder + f"/{master_folder_name}", file_path[5:]) + +# Step 1: Fetch column IDs +board_data = monday.boards.fetch_boards_by_id(board_id) +columns = board_data["data"]["boards"][0]["columns"] +col_id_map = {col["title"].lower(): col["id"] for col in columns} + +name_id = col_id_map.get("name") # Replace with actual title if different +files_id = col_id_map.get("file(s)") # Replace with actual title if different + +if not name_id or not files_id: + raise Exception("Could not find 'name' or 'file(s)' columns") + +items = get_all_items(board_id, monday) +for item in tqdm(items): + item_name = item["name"] + asset_ids = extract_asset_ids(item, files_id) + + to_upload = [] + for asset_id in asset_ids: + try: + public_url, file_name = get_public_url(asset_id) + print(f"Downloading {file_name} from {public_url}") + file_path = download_file_from_public_url(public_url, file_name) + to_upload.append(file_path) + except Exception as e: + print(f"Failed to download/upload asset {asset_id}: {e}") + + if to_upload: + upload_to_sharepoint(to_upload, item_name) \ No newline at end of file diff --git a/etl/surveyPrice/surveyPrice.py b/etl/surveyPrice/surveyPrice.py index 408b42e..ae51422 100644 --- a/etl/surveyPrice/surveyPrice.py +++ b/etl/surveyPrice/surveyPrice.py @@ -14,6 +14,7 @@ class SurveyPrice(): self.master_rate_card_path = None self.all_hubspot_submissions = None self.all_survey_info_from_sharepoint = None + self.downloaded_price_card = False self.required_sheets = [ @@ -66,16 +67,17 @@ class SurveyPrice(): } def download_price_card(self): - url = None - # TODO: Some sanity checks to ensure rate cards title stays consistent - for files in self.sharepoint_client.get_folders_in_path("/Commercials/Rate Cards")['value']: - if files['name'] == "MASTER RATE CARD.xlsx": - url = files['@microsoft.graph.downloadUrl'] - break + if self.downloaded_price_card is False: + url = None + # TODO: Some sanity checks to ensure rate cards title stays consistent + for files in self.sharepoint_client.get_folders_in_path("/Commercials/Rate Cards")['value']: + if files['name'] == "MASTER RATE CARD.xlsx": + url = files['@microsoft.graph.downloadUrl'] + break - if url: - content = self.sharepoint_client.get_file_content(url) - self.master_rate_card_path = self.sharepoint_client.create_temp_file(content, "rate_card/rate_card_all.xlsx") + if url: + content = self.sharepoint_client.get_file_content(url) + self.master_rate_card_path = self.sharepoint_client.create_temp_file(content, "rate_card/rate_card_all.xlsx") return self.master_rate_card_path @@ -169,19 +171,7 @@ class SurveyPrice(): self.all_survey_info_from_sharepoint = pd.concat([jjc_pd, scis_pd], ignore_index=True) return self.all_survey_info_from_sharepoint - - def sharepoint_data_for_installer(self, installer): - - sp = SharePointScraper(installer) - file_paths = sp.download_file_for_each_address() - surveys = [] - - for eachAddress in tqdm(file_paths): - for address, files in eachAddress.items(): - surveys.append(surveyedDataProcessor(address, files)) - - all_survey_info = [] - for surveyInfo in surveys: + def survey_to_pandas_format(self, surveyInfo, installer): cavity_wall_as_built = False csr = False foam_insulation = False @@ -240,16 +230,28 @@ class SurveyPrice(): "DOMNA JOB TYPE": "ECO4 PV" }) + return info + + + def sharepoint_data_for_installer(self, installer): + + sp = SharePointScraper(installer) + file_paths = sp.download_file_for_each_address() + surveys = [] + + for eachAddress in tqdm(file_paths): + for address, files in eachAddress.items(): + surveys.append(surveyedDataProcessor(address, files)) + + all_survey_info = [] + for surveyInfo in surveys: + info = self.survey_to_pandas_format(surveyInfo) all_survey_info.append(info) return pd.DataFrame(all_survey_info) - def merge_hub_spot_and_survey_information(self): - if self.all_survey_info_from_sharepoint is None: - raise RuntimeError("No survey information found from Sharepoint") - if self.all_hubspot_submissions is None: - raise RuntimeError("No information found from Hubspot") + def merge_hub_spot_and_survey_information_from_sharepoint_url(self, hubspot_data, survey_data): # Standardise address def extract_start_and_postcode(addr): @@ -261,23 +263,23 @@ class SurveyPrice(): return start, postcode # Extract start + postcode from both datasets - self.all_survey_info_from_sharepoint[['address_start', 'postcode']] = self.all_survey_info_from_sharepoint['SHAREPOINT ADDRESS'].apply( + survey_data[['address_start', 'postcode']] = survey_data['SHAREPOINT ADDRESS'].apply( lambda x: pd.Series(extract_start_and_postcode(x)) ) - self.all_hubspot_submissions[['address_start', 'postcode']] = self.all_hubspot_submissions['HUBSPOT_DEAL_ADDRESS'].apply( + hubspot_data[['address_start', 'postcode']] = hubspot_data['HUBSPOT_DEAL_ADDRESS'].apply( lambda x: pd.Series(extract_start_and_postcode(x)) ) # re-name to installer - self.all_survey_info_from_sharepoint = self.all_survey_info_from_sharepoint.rename( + survey_data = survey_data.rename( columns={ 'SHAREPOINT FLOOR_AREA_BANDING': 'FLOOR_AREA_BANDING', } ) - self.all_hubspot_submissions = self.all_hubspot_submissions.rename( + hubspot_data = hubspot_data.rename( columns={ 'HUBSPOT_WETROOMS': 'NO_OF_WETROOMS', 'HUBSPOT_TRICKLE_VENT': 'TRICKLE_VENT', @@ -285,8 +287,8 @@ class SurveyPrice(): ) merged_df = pd.merge( - self.all_survey_info_from_sharepoint, - self.all_hubspot_submissions, + survey_data, + hubspot_data, on=['address_start', 'postcode'], how='inner' ) @@ -294,8 +296,6 @@ class SurveyPrice(): # if hubspot detects merged_df.drop(columns=['address_start', 'postcode'], inplace=True) - - def compute_energy_grant(row): pre_band_letter = row["SHAREPOINT PRE_INSTALL_SAP_SCORE_BANDING"][-1] post_band_letter = surveyedDataProcessor.get_band(row["HUBSPOT_POST_INSTALL_SAP_SCORE"])[-1] @@ -320,12 +320,28 @@ class SurveyPrice(): merged_df["WORK TYPE"] = merged_df.apply(work_type, axis=1) return merged_df + - def calculate_all_price(self): + + def merge_hub_spot_and_survey_information(self): + if self.all_survey_info_from_sharepoint is None: + raise RuntimeError("No survey information found from Sharepoint") + if self.all_hubspot_submissions is None: + raise RuntimeError("No information found from Hubspot") + + merged_df = self.merge_hub_spot_and_survey_information_from_sharepoint_url(self.all_hubspot_submissions, self.all_survey_info_from_sharepoint) + return merged_df + + + def calculate_all_price(self, merged_data=None): self.download_price_card() - self.get_all_surveys_from_hubspot() - self.get_all_surveyed_data_from_sharepoint() - submission_data = self.merge_hub_spot_and_survey_information() + if merged_data is None: + self.get_all_surveys_from_hubspot() + self.get_all_surveyed_data_from_sharepoint() + submission_data = self.merge_hub_spot_and_survey_information() + else: + submission_data = merged_data + final_list = [] for _, row in submission_data.iterrows(): if "PV" in row["HUBSPOT_WORK_TYPE"].upper(): diff --git a/etl/utils/logger.py b/etl/utils/logger.py index 62970c6..bb70887 100644 --- a/etl/utils/logger.py +++ b/etl/utils/logger.py @@ -3,21 +3,18 @@ import os class Logger: def __init__(self, name, level=logging.INFO): - # Create a custom logger self.logger = logging.getLogger(name) self.logger.setLevel(level) - # Create handlers - c_handler = logging.StreamHandler() - c_handler.setLevel(level) + # ✅ Prevent adding multiple handlers + if not self.logger.handlers: + c_handler = logging.StreamHandler() + c_handler.setLevel(level) - # Create formatters and add it to handlers - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') - c_handler.setFormatter(formatter) - - # Add handlers to the logger - self.logger.addHandler(c_handler) + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + c_handler.setFormatter(formatter) + self.logger.addHandler(c_handler) def get_logger(self): return self.logger \ No newline at end of file