survyed sign off

This commit is contained in:
Jun-te Kim 2025-07-14 10:08:58 +00:00
parent 1e467cfd56
commit 4075fbaa3c
14 changed files with 99 additions and 20 deletions

View file

@ -1,7 +1,7 @@
name: Deal Notes From HubSpot Scraper
name: Daily Surved
on:
schedule:
- cron: '0 19 * * 0'
- cron: '0 17 * * 1-5'
workflow_dispatch:
jobs:
@ -24,6 +24,6 @@ jobs:
run: |
pwd
ls -la
poetry run python etl/dimitra_hubspot_notes_gather.py
poetry run python etl/hubspot_surveyed_needs_sign_off.py
env:
PYTHONPATH: ${{ github.workspace }}

View file

@ -1,5 +1,5 @@
import os
from pdfReader.pdfReaderToText import pdfReaderToText
from fileReader.pdfReaderToText import pdfReaderToText
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING
from pprint import pprint, pformat
import logging

View file

@ -1,6 +1,6 @@
from etl.scraper.scraper import SharePointScraper, SharePointInstaller
from pprint import pformat
from etl.pdfReader.pdfReaderToText import pdfReaderToText
from etl.fileReader.pdfReaderToText import pdfReaderToText
from etl.surveyedData.surveryedData import surveyedDataProcessor
import pandas as pd

View file

@ -10,3 +10,6 @@ class ReportType(Enum):
ECO_CONDITION_REPORT = "osmosis_condition_pas_2035_report"
WARM_HOMES_CONDITION_REPORT = "warm_homes_condition_pas_2035_report"
RDSAP_ENERGY_REPORT = "rdsap_energy_report"
LIG_XML = "lodgement_xml_needed_for_lodgement_to_like_trademark"
RDSAP_XML = "reduce_xml_needed_to_generate_full_sap_xml"
FULLSAP_XML = "full_xml_needed_for_co_ordination"

View file

@ -1,4 +1,4 @@
from etl.pdfReader.reportType import ReportType
from etl.fileReader.reportType import ReportType
from etl.transform.preSiteNoteTypes import (
CompanyInfo, PreSiteNotesSummaryInfo, AssessorInfo,
PropertyDescription, PropertyDetail, Dimension,

View file

@ -0,0 +1,43 @@
from etl.utils.logger import Logger
import logging
from xml.dom.minidom import parse
import os
from etl.fileReader.reportType import ReportType
class xmlReader():
def __init__(self, file_path):
self.source_path = file_path
self.logger = Logger(name='xmlReader', level=logging.INFO).get_logger()
self.xml_obj = None
self.type = None
self.get_xml_obj()
def get_xml_obj(self):
try:
if not os.path.exists(self.source_path):
self.logger.error(f"File not found: {self.source_path}")
return None
with open(self.source_path, 'r', encoding='utf-8') as file:
self.xml_obj = parse(file)
self.get_type()
return self.xml_obj
except Exception as e:
self.logger.error(f"Failed to parse XML file {self.source_path}: {e}")
self.xml_obj = None
return self.xml_obj
def get_type(self):
xmlHeaderName = self.xml_obj.documentElement.tagName
xmlHeaderName = xmlHeaderName.lower()
if xmlHeaderName == 'RdSap-Report'.lower():
self.type = ReportType.LIG_XML
elif xmlHeaderName == "SurveyRec".lower():
self.type = ReportType.RDSAP_XML
elif xmlHeaderName == "ImportExportRecord".lower():
self.type = ReportType.FULLSAP_XML
else:
pass
return self.type

View file

@ -71,11 +71,27 @@ class SubmissionInfoFromDeal(BaseModel):
# download files in url and check files are there:
try:
files = sp.download_files_from_path(path)
print(files)
sdp = surveyedDataProcessor("fake address", files)
assert sdp.condition_report is not None, "Condition Report is missing"
assert sdp.energy_report is not None, "Energy Report pdf is missing"
missing_items = []
if sdp.condition_report is None:
missing_items.append("Condition Report")
if sdp.energy_report is None:
missing_items.append("Energy Report PDF")
if sdp.rd_sap_xml is None:
missing_items.append("RDSAP XML")
if sdp.lig_sap_xml is None:
missing_items.append("LIG SAP XML")
if missing_items:
raise ValueError(f"Missing required items: {', '.join(missing_items)}")
except Exception as e:
raise ValueError(str(e))

View file

@ -1,3 +1,7 @@
"""
This is the script that runs when we are at the 'surveyed-needs sign off' stage within hubspot
"""
import os
from pprint import pprint
@ -15,6 +19,7 @@ hubspotClient = HubSpotClient()
deals = hubspotClient.get_deals_from_deal_stage(DealStage.SURVEYED_COMPLETE_NEEDS_SIGN_OFF)
for deal in deals:
hubspotClient.move_deals_to_different_stage([deal.deal_id], DealStage.SURVEYED_COMPLETED_SIGNED_OFF.value)
# TODO sanity address check
# TODO load
# TODO load when we are at 'ready to co-ordination' - script!

View file

@ -5,7 +5,7 @@ os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING
from pprint import pformat
from etl.pdfReader.pdfReaderToText import pdfReaderToText
from etl.fileReader.pdfReaderToText import pdfReaderToText
from etl.surveyedData.surveryedData import surveyedDataProcessor
import pandas as pd
import math

View file

@ -5,7 +5,7 @@ from datetime import datetime
from pydantic import EmailStr
from sqlalchemy import Column
from sqlalchemy.dialects.postgresql import UUID
from etl.pdfReader.reportType import ReportType
from etl.fileReader.reportType import ReportType
class BaseModel(SQLModel):
id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)

View file

@ -1,6 +1,6 @@
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING
from pprint import pformat
from etl.pdfReader.pdfReaderToText import pdfReaderToText
from etl.fileReader.pdfReaderToText import pdfReaderToText
from etl.surveyedData.surveryedData import surveyedDataProcessor
import pandas as pd

View file

@ -1,6 +1,6 @@
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING
from pprint import pformat
from etl.pdfReader.pdfReaderToText import pdfReaderToText
from etl.fileReader.pdfReaderToText import pdfReaderToText
from etl.surveyedData.surveryedData import surveyedDataProcessor
import pandas as pd

View file

@ -1,5 +1,6 @@
from etl.pdfReader.pdfReaderToText import pdfReaderToText
from etl.pdfReader.reportType import ReportType
from etl.fileReader.pdfReaderToText import pdfReaderToText
from etl.fileReader.xmlReader import xmlReader
from etl.fileReader.reportType import ReportType
import math
from xml.dom.minidom import parseString
from etl.models.preSiteNoteTypes import (
@ -41,6 +42,10 @@ class surveyedDataProcessor():
self.condition_report = None
self.hubspot_deal_id = None
self.energy_report = None
self.full_sap_xml = None
self.lig_sap_xml = None
self.rd_sap_xml = None
self.identify_files()
@ -62,8 +67,15 @@ class surveyedDataProcessor():
elif pdf.type == ReportType.RDSAP_ENERGY_REPORT:
self.energy_report = pdf.get_reader()
elif file.lower().endswith('.xml'):
print(f"identified an xml file {file.lower()}")
pass
xml = xmlReader(file)
if xml:
if xml.type is ReportType.FULLSAP_XML:
self.full_sap_xml = xml.xml_obj
elif xml.type is ReportType.LIG_XML:
self.lig_sap_xml = xml.xml_obj
elif xml.type is ReportType.RDSAP_XML:
self.rd_sap_xml = xml.xml_obj
def load_condition_report(self, db_session):
general_information = self.load_general_information_from_condition_report(db_session)

View file

@ -2,8 +2,8 @@ import os
import logging
from etl.utils.logger import Logger
import re
from etl.pdfReader.pdfReaderToText import pdfReaderToText
from etl.pdfReader.reportType import ReportType
from etl.fileReader.pdfReaderToText import pdfReaderToText
from etl.fileReader.reportType import ReportType
class DomnaSharePointValidator():