diff --git a/.db-env b/.db-env index 2874869..d5a81db 100644 --- a/.db-env +++ b/.db-env @@ -3,4 +3,5 @@ POSTGRES_PASSWORD=makingwarmhomes POSTGRES_HOST=localhost POSTGRES_PORT=5432 PGADMIN_DEFAULT_EMAIL=junte@domna.homes -PGADMIN_DEFAULT_PASSWORD=makingwarmhomes \ No newline at end of file +PGADMIN_DEFAULT_PASSWORD=makingwarmhomes +DATABASE_URL=postgresql://postgres:makingwarmhomes@db:5432/postgres \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 50bff3f..880cfb3 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -17,7 +17,8 @@ "ms-python.python", "ms-toolsai.jupyter", "mechatroner.rainbow-csv", - "ms-toolsai.datawrangler" + "ms-toolsai.datawrangler", + "lindacong.vscode-book-reader" ] } } diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 3213e61..ec14ffc 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -7,6 +7,8 @@ services: context: .. dockerfile: .devcontainer/Dockerfile command: sleep infinity + env_file: + - ../.db-env volumes: - ..:/workspaces/survey-extractor:cached diff --git a/etl/load.py b/etl/load.py index 16bce71..d4b456a 100644 --- a/etl/load.py +++ b/etl/load.py @@ -1,3 +1,38 @@ # A very simple script to read a presite note and load to a postgres database +import os +from etl.surveyedData.surveryedData import surveyedDataProcessor +from etl.transform.types import get_db_session, init_db, AssessorInfo + +pre_site_note_path = os.path.join(os.getcwd(), "..", "example_data", "pre_site_note.pdf") + +survey_one = surveyedDataProcessor("123 Fake Street", [pre_site_note_path]) + +assert survey_one.pre_site_note.assessor_information.accreditation_number == "QUID211435" + +init_db() + +assessor1 = AssessorInfo( + accreditation_number=survey_one.pre_site_note.assessor_information.accreditation_number, + name=survey_one.pre_site_note.assessor_information.name, + phone_number=survey_one.pre_site_note.assessor_information.phone_number, + email_address=survey_one.pre_site_note.assessor_information.email_address, +) + +with get_db_session() as session: + session.add(assessor1) + session.commit() + + + + + + + + + +# Download a pdf file and save it in git for easier processes and tests +# scrape the correct data +# Pydantic structure +# Load to database successfully locally \ No newline at end of file diff --git a/etl/transform/types.py b/etl/transform/types.py index 9ab8463..dada10e 100644 --- a/etl/transform/types.py +++ b/etl/transform/types.py @@ -1,12 +1,36 @@ -from sqlmodel import Field, SQLModel, create_engine, Relationship -from uuid import uuid4 +from sqlmodel import Field, SQLModel, create_engine, Relationship, Session +import uuid from datetime import datetime from pydantic import field_validator, EmailStr +from pydantic_settings import BaseSettings from typing import Optional, List +from sqlalchemy import Column +from sqlalchemy.dialects.postgresql import UUID + +class Settings(BaseSettings): + DATABASE_URL: Optional[str] = None # Default to None if not set + + class Config: + env_file = ".env" # Load from an optional .env file + +settings = Settings() +engine = create_engine(settings.DATABASE_URL) if settings.DATABASE_URL else None + + +def get_db_session(): + if engine is None: + raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.") + return Session(engine) + +def init_db(): + if engine: + SQLModel.metadata.create_all(engine) class BaseModel(SQLModel): - id: str = Field(default_factory=uuid4, primary_key=True, index=True, sa_column_kwargs={'type_': 'UUID'}) - + id: uuid.UUID = Field( + default_factory=uuid.uuid4, + sa_column=Column(UUID(as_uuid=True), primary_key=True) + ) class Dimension(BaseModel): floor_area_m2: float @@ -75,7 +99,7 @@ class Door(BaseModel): no_of_insulated_doors: int u_value_w_m2_k: Optional[str] -class AssessorInfo(BaseModel): +class AssessorInfo(BaseModel, table=True): accreditation_number: str name: str phone_number: Optional[str] = None diff --git a/example_data/pre_site_note.pdf b/example_data/pre_site_note.pdf new file mode 100644 index 0000000..8339e03 Binary files /dev/null and b/example_data/pre_site_note.pdf differ diff --git a/poetry.lock b/poetry.lock index 61ccb90..7ffe6d7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1206,6 +1206,27 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" +[[package]] +name = "pydantic-settings" +version = "2.8.1" +description = "Settings management using Pydantic" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pydantic_settings-2.8.1-py3-none-any.whl", hash = "sha256:81942d5ac3d905f7f3ee1a70df5dfb62d5569c12f51a5a647defc1c3d9ee2e9c"}, + {file = "pydantic_settings-2.8.1.tar.gz", hash = "sha256:d5c663dfbe9db9d5e1c646b2e161da12f0d734d422ee56f567d0ea2cee4e8585"}, +] + +[package.dependencies] +pydantic = ">=2.7.0" +python-dotenv = ">=0.21.0" + +[package.extras] +azure-key-vault = ["azure-identity (>=1.16.0)", "azure-keyvault-secrets (>=4.8.0)"] +toml = ["tomli (>=2.0.1)"] +yaml = ["pyyaml (>=6.0.1)"] + [[package]] name = "pygments" version = "2.19.1" @@ -1275,6 +1296,21 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-dotenv" +version = "1.1.0" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d"}, + {file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pytz" version = "2025.1" @@ -1698,4 +1734,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.12" -content-hash = "8d7db7d1395e8a16703b9b23de79f9d0e53307dea379e229a26081bc301aaf93" +content-hash = "f49b1428e8390de0d5989fcd7319d0f1e54d29127e967a3a7d9c07acd74e6d39" diff --git a/pyproject.toml b/pyproject.toml index 0ec3f55..b80566d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "fuzzywuzzy (>=0.18.0,<0.19.0)", "sqlmodel (>=0.0.24,<0.0.25)", "psycopg2 (>=2.9.10,<3.0.0)", + "pydantic-settings (>=2.8.1,<3.0.0)", ] [tool.poetry]