mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-30 13:10:56 +00:00
adding types and transforming my data
This commit is contained in:
parent
85b7b1d772
commit
c7753c0ed6
3 changed files with 64 additions and 26 deletions
|
|
@ -16,6 +16,7 @@ pdfReader = pdfReaderToText(DATA_LOC_1)
|
|||
doc2 = pdfReader.get_reader()
|
||||
pdfReader2 = pdfReaderToText(DATA_LOC_2)
|
||||
doc1 = pdfReader2.get_reader()
|
||||
print(doc1.survey_information)
|
||||
|
||||
# Transform
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from etl.pdfReader.reportType import ReportType
|
||||
from transform.types import CompanyInformation
|
||||
from transform.types import CompanyInfo, SurverySummaryInfo, AssessorInfo
|
||||
from datetime import datetime
|
||||
|
||||
class SiteNotesExtractor():
|
||||
def __init__(self, data_list):
|
||||
|
|
@ -20,8 +21,9 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor):
|
|||
def __init__(self, data_list):
|
||||
super().__init__(data_list)
|
||||
self.type = ReportType.QUIDOS_SITE_NOTE
|
||||
self.setup()
|
||||
self.company_information = None
|
||||
self.survey_information = None
|
||||
self.setup()
|
||||
|
||||
def setup(self):
|
||||
"""
|
||||
|
|
@ -53,7 +55,7 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor):
|
|||
self.get_section_20()
|
||||
self.get_section_21()
|
||||
self.get_section_22()
|
||||
|
||||
|
||||
def get_summary_information(self):
|
||||
# Summary Information
|
||||
avoid = [
|
||||
|
|
@ -85,28 +87,35 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor):
|
|||
'Emission figures including 9.92 emission factor of 0.925',
|
||||
]
|
||||
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
|
||||
|
||||
self.survey_information = SurverySummaryInfo(
|
||||
reference_number = get_value('Reference Number'),
|
||||
epc_language = get_value('EPC Language'),
|
||||
uprn = get_value('UPRN'),
|
||||
postcode = get_value('Postcode'),
|
||||
region = get_value('Region'),
|
||||
address = get_value('Address'),
|
||||
town = get_value('Town'),
|
||||
county = get_value('County'),
|
||||
property_tenure = get_value('Property Tenure'),
|
||||
transaction_type = get_value('Transaction Type'),
|
||||
inspection_date = datetime.strptime(get_value('Inspection Date'), '%d %B %Y'),
|
||||
)
|
||||
|
||||
self.reference_number = get_value('Reference Number')
|
||||
self.epc_language = get_value('EPC Language')
|
||||
self.UPRN = get_value('UPRN')
|
||||
self.postcode = get_value('Postcode')
|
||||
self.region = get_value('Region')
|
||||
self.address = get_value('Address')
|
||||
self.town = get_value('Town')
|
||||
self.country = get_value('County')
|
||||
self.property_tenure = get_value('Property Tenure')
|
||||
self.transaction_type = get_value('Transaction Type')
|
||||
self.inspection_date = get_value('Inspection Date')
|
||||
self.assessor_accrediation_number = get_value("Assessor’s accreditation number")
|
||||
self.company_info = CompanyInformation(
|
||||
|
||||
self.company_information = CompanyInfo(
|
||||
trading_name = get_value('Company name/trading name'),
|
||||
post_code = get_value('POST CODE'),
|
||||
fax_number = get_value('Fax number'),
|
||||
related_party_disclosure= get_value("Related party disclosure")
|
||||
related_party_disclosure = get_value("Related party disclosure")
|
||||
)
|
||||
|
||||
self.assessor_information = AssessorInfo(
|
||||
accreditation_number = get_value("Assessor’s accreditation number"),
|
||||
name = get_value("Assessor’s name"),
|
||||
phone_number = get_value("Phone number"),
|
||||
email_address = get_value("E-mail address"),
|
||||
)
|
||||
self.assessor_name = get_value("Assessor’s name")
|
||||
self.assessor_phone_number = get_value("Phone number")
|
||||
self.assessor_email_address = get_value("E-mail address")
|
||||
|
||||
index = self.get_x_occurance(self.raw_data, "Address")
|
||||
if index:
|
||||
|
|
|
|||
|
|
@ -1,8 +1,36 @@
|
|||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, constr, field_validator, EmailStr
|
||||
from typing import Optional, List
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
class CompanyInfo(BaseModel):
|
||||
trading_name: str
|
||||
post_code: str
|
||||
fax_number: Optional[str] = None
|
||||
related_party_disclosure: Optional[str] = None
|
||||
|
||||
@field_validator('related_party_disclosure', pre=True, always=True)
|
||||
def set_none_if_none_of_the_above(cls, v):
|
||||
if v == "None of the above":
|
||||
return None
|
||||
return v
|
||||
|
||||
class SurverySummaryInfo(BaseModel):
|
||||
reference_number: str
|
||||
epc_language: str
|
||||
uprn: str
|
||||
postcode: str
|
||||
region: str
|
||||
address: str
|
||||
town: str
|
||||
county: Optional[str] = None
|
||||
property_tenure: str
|
||||
transaction_type: str
|
||||
inspection_date: datetime
|
||||
|
||||
class AssessorInfo(BaseModel):
|
||||
accreditation_number: str
|
||||
name: str
|
||||
phone_number: Optional[str] = None
|
||||
email_address: Optional[EmailStr] = None
|
||||
|
||||
class CompanyInformation(BaseModel):
|
||||
trading_name: Optional[str]
|
||||
post_code: Optional[str]
|
||||
fax_number: Optional[str]
|
||||
related_party_disclosure: Optional[str]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue