From 43b6af9ddb8963c25192d45c43a1597f25beb2d7 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 13 Mar 2025 12:18:38 +0000 Subject: [PATCH] types --- etl/pdfReader/sitenotes.py | 22 +++++++++++++++------ etl/transform/types.py | 6 +++++- poetry.lock | 40 +++++++++++++++++++++++++++++++++++++- pyproject.toml | 2 +- 4 files changed, 61 insertions(+), 9 deletions(-) diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py index 941cc3e..0c5a3dd 100644 --- a/etl/pdfReader/sitenotes.py +++ b/etl/pdfReader/sitenotes.py @@ -100,6 +100,10 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): property_tenure = get_value('Property Tenure'), transaction_type = get_value('Transaction Type'), inspection_date = datetime.strptime(get_value('Inspection Date'), '%d %B %Y'), + current_sap = get_value('Current SAP rating'), + potential_sap = get_value('Potential SAP rating'), + current_ei = get_value('Current EI rating'), + potential_ei = get_value('Potential EI rating'), ) @@ -119,13 +123,19 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): index = self.get_x_occurance(self.raw_data, "Address") if index: - self.assessor_address = self.raw_data[index + 1] + assessor_address = self.raw_data[index + 1] else: - self.assessor_address = None - self.current_sap_rating = get_value('Current SAP rating') - self.potential_sap_rating = get_value('Potential SAP rating') - self.current_ei_rating = get_value('Current EI rating') - self.potential_ei_rating = get_value('Potential EI rating') + assessor_address = None + + self.assessor_information = AssessorInfo( + accreditation_number = get_value("Assessor’s accreditation number"), + name = get_value("Assessor’s name"), + phone_number = get_value("Phone number"), + email_address = get_value("E-mail address"), + address = assessor_address, + ) + + self.current_annual_emissions = get_value('Current annual emissions') self.current_annual_energy_costs = get_value('Current annual energy costs') diff --git a/etl/transform/types.py b/etl/transform/types.py index 65be368..75d14bb 100644 --- a/etl/transform/types.py +++ b/etl/transform/types.py @@ -9,7 +9,7 @@ class CompanyInfo(BaseModel): fax_number: Optional[str] = None related_party_disclosure: Optional[str] = None - @field_validator('related_party_disclosure', pre=True, always=True) + @field_validator('related_party_disclosure') def set_none_if_none_of_the_above(cls, v): if v == "None of the above": return None @@ -27,6 +27,10 @@ class SurverySummaryInfo(BaseModel): property_tenure: str transaction_type: str inspection_date: datetime + current_sap: str + potential_sap: str + current_ei: str + potential_ei: str class AssessorInfo(BaseModel): accreditation_number: str diff --git a/poetry.lock b/poetry.lock index b9648b9..90ea6c7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -373,6 +373,43 @@ files = [ {file = "decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360"}, ] +[[package]] +name = "dnspython" +version = "2.7.0" +description = "DNS toolkit" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86"}, + {file = "dnspython-2.7.0.tar.gz", hash = "sha256:ce9c432eda0dc91cf618a5cedf1a4e142651196bbcd2c80e89ed5a907e5cfaf1"}, +] + +[package.extras] +dev = ["black (>=23.1.0)", "coverage (>=7.0)", "flake8 (>=7)", "hypercorn (>=0.16.0)", "mypy (>=1.8)", "pylint (>=3)", "pytest (>=7.4)", "pytest-cov (>=4.1.0)", "quart-trio (>=0.11.0)", "sphinx (>=7.2.0)", "sphinx-rtd-theme (>=2.0.0)", "twine (>=4.0.0)", "wheel (>=0.42.0)"] +dnssec = ["cryptography (>=43)"] +doh = ["h2 (>=4.1.0)", "httpcore (>=1.0.0)", "httpx (>=0.26.0)"] +doq = ["aioquic (>=1.0.0)"] +idna = ["idna (>=3.7)"] +trio = ["trio (>=0.23)"] +wmi = ["wmi (>=1.5.1)"] + +[[package]] +name = "email-validator" +version = "2.2.0" +description = "A robust email address syntax and deliverability validation library." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631"}, + {file = "email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7"}, +] + +[package.dependencies] +dnspython = ">=2.0.0" +idna = ">=2.0.0" + [[package]] name = "executing" version = "2.2.0" @@ -898,6 +935,7 @@ files = [ [package.dependencies] annotated-types = ">=0.6.0" +email-validator = {version = ">=2.0.0", optional = true, markers = "extra == \"email\""} pydantic-core = "2.27.2" typing-extensions = ">=4.12.2" @@ -1398,4 +1436,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.12" -content-hash = "24dce297bf4e2bef44ccec4496768e5cbcefbf7678a88293e15b1251bc657363" +content-hash = "710051703d97e156a540ad08b0815338a4283146f6fca3c0ae89cc4e6dad459a" diff --git a/pyproject.toml b/pyproject.toml index 8c3a789..4d3d01b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ dependencies = [ "pymupdf (>=1.25.3,<2.0.0)", "msal (>=1.31.1,<2.0.0)", "pandas (>=2.2.3,<3.0.0)", - "pydantic (>=2.10.6,<3.0.0)", + "pydantic[email] (>=2.10.6,<3.0.0)", ] [tool.poetry]