mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
survyed sign off
This commit is contained in:
parent
1e467cfd56
commit
4075fbaa3c
14 changed files with 99 additions and 20 deletions
|
|
@ -1,7 +1,7 @@
|
|||
name: Deal Notes From HubSpot Scraper
|
||||
name: Daily Surved
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 19 * * 0'
|
||||
- cron: '0 17 * * 1-5'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
|
|
@ -24,6 +24,6 @@ jobs:
|
|||
run: |
|
||||
pwd
|
||||
ls -la
|
||||
poetry run python etl/dimitra_hubspot_notes_gather.py
|
||||
poetry run python etl/hubspot_surveyed_needs_sign_off.py
|
||||
env:
|
||||
PYTHONPATH: ${{ github.workspace }}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
import os
|
||||
from pdfReader.pdfReaderToText import pdfReaderToText
|
||||
from fileReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING
|
||||
from pprint import pprint, pformat
|
||||
import logging
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from etl.scraper.scraper import SharePointScraper, SharePointInstaller
|
||||
from pprint import pformat
|
||||
from etl.pdfReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.fileReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.surveyedData.surveryedData import surveyedDataProcessor
|
||||
import pandas as pd
|
||||
|
||||
|
|
|
|||
|
|
@ -10,3 +10,6 @@ class ReportType(Enum):
|
|||
ECO_CONDITION_REPORT = "osmosis_condition_pas_2035_report"
|
||||
WARM_HOMES_CONDITION_REPORT = "warm_homes_condition_pas_2035_report"
|
||||
RDSAP_ENERGY_REPORT = "rdsap_energy_report"
|
||||
LIG_XML = "lodgement_xml_needed_for_lodgement_to_like_trademark"
|
||||
RDSAP_XML = "reduce_xml_needed_to_generate_full_sap_xml"
|
||||
FULLSAP_XML = "full_xml_needed_for_co_ordination"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from etl.pdfReader.reportType import ReportType
|
||||
from etl.fileReader.reportType import ReportType
|
||||
from etl.transform.preSiteNoteTypes import (
|
||||
CompanyInfo, PreSiteNotesSummaryInfo, AssessorInfo,
|
||||
PropertyDescription, PropertyDetail, Dimension,
|
||||
|
|
|
|||
43
etl/fileReader/xmlReader.py
Normal file
43
etl/fileReader/xmlReader.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
from etl.utils.logger import Logger
|
||||
import logging
|
||||
from xml.dom.minidom import parse
|
||||
import os
|
||||
from etl.fileReader.reportType import ReportType
|
||||
|
||||
class xmlReader():
|
||||
def __init__(self, file_path):
|
||||
self.source_path = file_path
|
||||
self.logger = Logger(name='xmlReader', level=logging.INFO).get_logger()
|
||||
self.xml_obj = None
|
||||
self.type = None
|
||||
self.get_xml_obj()
|
||||
|
||||
|
||||
def get_xml_obj(self):
|
||||
try:
|
||||
if not os.path.exists(self.source_path):
|
||||
self.logger.error(f"File not found: {self.source_path}")
|
||||
return None
|
||||
|
||||
with open(self.source_path, 'r', encoding='utf-8') as file:
|
||||
self.xml_obj = parse(file)
|
||||
self.get_type()
|
||||
return self.xml_obj
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to parse XML file {self.source_path}: {e}")
|
||||
self.xml_obj = None
|
||||
return self.xml_obj
|
||||
|
||||
def get_type(self):
|
||||
xmlHeaderName = self.xml_obj.documentElement.tagName
|
||||
xmlHeaderName = xmlHeaderName.lower()
|
||||
if xmlHeaderName == 'RdSap-Report'.lower():
|
||||
self.type = ReportType.LIG_XML
|
||||
elif xmlHeaderName == "SurveyRec".lower():
|
||||
self.type = ReportType.RDSAP_XML
|
||||
elif xmlHeaderName == "ImportExportRecord".lower():
|
||||
self.type = ReportType.FULLSAP_XML
|
||||
else:
|
||||
pass
|
||||
return self.type
|
||||
|
|
@ -71,11 +71,27 @@ class SubmissionInfoFromDeal(BaseModel):
|
|||
|
||||
# download files in url and check files are there:
|
||||
try:
|
||||
|
||||
files = sp.download_files_from_path(path)
|
||||
print(files)
|
||||
sdp = surveyedDataProcessor("fake address", files)
|
||||
assert sdp.condition_report is not None, "Condition Report is missing"
|
||||
assert sdp.energy_report is not None, "Energy Report pdf is missing"
|
||||
missing_items = []
|
||||
|
||||
if sdp.condition_report is None:
|
||||
missing_items.append("Condition Report")
|
||||
|
||||
if sdp.energy_report is None:
|
||||
missing_items.append("Energy Report PDF")
|
||||
|
||||
if sdp.rd_sap_xml is None:
|
||||
missing_items.append("RDSAP XML")
|
||||
|
||||
if sdp.lig_sap_xml is None:
|
||||
missing_items.append("LIG SAP XML")
|
||||
|
||||
if missing_items:
|
||||
raise ValueError(f"Missing required items: {', '.join(missing_items)}")
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(str(e))
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,7 @@
|
|||
"""
|
||||
This is the script that runs when we are at the 'surveyed-needs sign off' stage within hubspot
|
||||
"""
|
||||
|
||||
import os
|
||||
from pprint import pprint
|
||||
|
||||
|
|
@ -15,6 +19,7 @@ hubspotClient = HubSpotClient()
|
|||
deals = hubspotClient.get_deals_from_deal_stage(DealStage.SURVEYED_COMPLETE_NEEDS_SIGN_OFF)
|
||||
|
||||
|
||||
for deal in deals:
|
||||
hubspotClient.move_deals_to_different_stage([deal.deal_id], DealStage.SURVEYED_COMPLETED_SIGNED_OFF.value)
|
||||
|
||||
# TODO sanity address check
|
||||
# TODO load
|
||||
# TODO load when we are at 'ready to co-ordination' - script!
|
||||
|
|
@ -5,7 +5,7 @@ os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
|
|||
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
|
||||
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING
|
||||
from pprint import pformat
|
||||
from etl.pdfReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.fileReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.surveyedData.surveryedData import surveyedDataProcessor
|
||||
import pandas as pd
|
||||
import math
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from datetime import datetime
|
|||
from pydantic import EmailStr
|
||||
from sqlalchemy import Column
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from etl.pdfReader.reportType import ReportType
|
||||
from etl.fileReader.reportType import ReportType
|
||||
|
||||
class BaseModel(SQLModel):
|
||||
id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING
|
||||
from pprint import pformat
|
||||
from etl.pdfReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.fileReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.surveyedData.surveryedData import surveyedDataProcessor
|
||||
import pandas as pd
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING
|
||||
from pprint import pformat
|
||||
from etl.pdfReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.fileReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.surveyedData.surveryedData import surveyedDataProcessor
|
||||
import pandas as pd
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from etl.pdfReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.pdfReader.reportType import ReportType
|
||||
from etl.fileReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.fileReader.xmlReader import xmlReader
|
||||
from etl.fileReader.reportType import ReportType
|
||||
import math
|
||||
from xml.dom.minidom import parseString
|
||||
from etl.models.preSiteNoteTypes import (
|
||||
|
|
@ -41,6 +42,10 @@ class surveyedDataProcessor():
|
|||
self.condition_report = None
|
||||
self.hubspot_deal_id = None
|
||||
self.energy_report = None
|
||||
self.full_sap_xml = None
|
||||
self.lig_sap_xml = None
|
||||
self.rd_sap_xml = None
|
||||
|
||||
self.identify_files()
|
||||
|
||||
|
||||
|
|
@ -62,8 +67,15 @@ class surveyedDataProcessor():
|
|||
elif pdf.type == ReportType.RDSAP_ENERGY_REPORT:
|
||||
self.energy_report = pdf.get_reader()
|
||||
elif file.lower().endswith('.xml'):
|
||||
print(f"identified an xml file {file.lower()}")
|
||||
pass
|
||||
xml = xmlReader(file)
|
||||
if xml:
|
||||
if xml.type is ReportType.FULLSAP_XML:
|
||||
self.full_sap_xml = xml.xml_obj
|
||||
|
||||
elif xml.type is ReportType.LIG_XML:
|
||||
self.lig_sap_xml = xml.xml_obj
|
||||
elif xml.type is ReportType.RDSAP_XML:
|
||||
self.rd_sap_xml = xml.xml_obj
|
||||
|
||||
def load_condition_report(self, db_session):
|
||||
general_information = self.load_general_information_from_condition_report(db_session)
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@ import os
|
|||
import logging
|
||||
from etl.utils.logger import Logger
|
||||
import re
|
||||
from etl.pdfReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.pdfReader.reportType import ReportType
|
||||
from etl.fileReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.fileReader.reportType import ReportType
|
||||
|
||||
|
||||
class DomnaSharePointValidator():
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue