proof of concept works

This commit is contained in:
Jun-te Kim 2025-03-26 16:49:14 +00:00
parent b8a0548798
commit fbc6e76845
8 changed files with 108 additions and 8 deletions

View file

@ -3,4 +3,5 @@ POSTGRES_PASSWORD=makingwarmhomes
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
PGADMIN_DEFAULT_EMAIL=junte@domna.homes
PGADMIN_DEFAULT_PASSWORD=makingwarmhomes
PGADMIN_DEFAULT_PASSWORD=makingwarmhomes
DATABASE_URL=postgresql://postgres:makingwarmhomes@db:5432/postgres

View file

@ -17,7 +17,8 @@
"ms-python.python",
"ms-toolsai.jupyter",
"mechatroner.rainbow-csv",
"ms-toolsai.datawrangler"
"ms-toolsai.datawrangler",
"lindacong.vscode-book-reader"
]
}
}

View file

@ -7,6 +7,8 @@ services:
context: ..
dockerfile: .devcontainer/Dockerfile
command: sleep infinity
env_file:
- ../.db-env
volumes:
- ..:/workspaces/survey-extractor:cached

View file

@ -1,3 +1,38 @@
# A very simple script to read a presite note and load to a postgres database
import os
from etl.surveyedData.surveryedData import surveyedDataProcessor
from etl.transform.types import get_db_session, init_db, AssessorInfo
pre_site_note_path = os.path.join(os.getcwd(), "..", "example_data", "pre_site_note.pdf")
survey_one = surveyedDataProcessor("123 Fake Street", [pre_site_note_path])
assert survey_one.pre_site_note.assessor_information.accreditation_number == "QUID211435"
init_db()
assessor1 = AssessorInfo(
accreditation_number=survey_one.pre_site_note.assessor_information.accreditation_number,
name=survey_one.pre_site_note.assessor_information.name,
phone_number=survey_one.pre_site_note.assessor_information.phone_number,
email_address=survey_one.pre_site_note.assessor_information.email_address,
)
with get_db_session() as session:
session.add(assessor1)
session.commit()
# Download a pdf file and save it in git for easier processes and tests
# scrape the correct data
# Pydantic structure
# Load to database successfully locally

View file

@ -1,12 +1,36 @@
from sqlmodel import Field, SQLModel, create_engine, Relationship
from uuid import uuid4
from sqlmodel import Field, SQLModel, create_engine, Relationship, Session
import uuid
from datetime import datetime
from pydantic import field_validator, EmailStr
from pydantic_settings import BaseSettings
from typing import Optional, List
from sqlalchemy import Column
from sqlalchemy.dialects.postgresql import UUID
class Settings(BaseSettings):
DATABASE_URL: Optional[str] = None # Default to None if not set
class Config:
env_file = ".env" # Load from an optional .env file
settings = Settings()
engine = create_engine(settings.DATABASE_URL) if settings.DATABASE_URL else None
def get_db_session():
if engine is None:
raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.")
return Session(engine)
def init_db():
if engine:
SQLModel.metadata.create_all(engine)
class BaseModel(SQLModel):
id: str = Field(default_factory=uuid4, primary_key=True, index=True, sa_column_kwargs={'type_': 'UUID'})
id: uuid.UUID = Field(
default_factory=uuid.uuid4,
sa_column=Column(UUID(as_uuid=True), primary_key=True)
)
class Dimension(BaseModel):
floor_area_m2: float
@ -75,7 +99,7 @@ class Door(BaseModel):
no_of_insulated_doors: int
u_value_w_m2_k: Optional[str]
class AssessorInfo(BaseModel):
class AssessorInfo(BaseModel, table=True):
accreditation_number: str
name: str
phone_number: Optional[str] = None

Binary file not shown.

38
poetry.lock generated
View file

@ -1206,6 +1206,27 @@ files = [
[package.dependencies]
typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
[[package]]
name = "pydantic-settings"
version = "2.8.1"
description = "Settings management using Pydantic"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "pydantic_settings-2.8.1-py3-none-any.whl", hash = "sha256:81942d5ac3d905f7f3ee1a70df5dfb62d5569c12f51a5a647defc1c3d9ee2e9c"},
{file = "pydantic_settings-2.8.1.tar.gz", hash = "sha256:d5c663dfbe9db9d5e1c646b2e161da12f0d734d422ee56f567d0ea2cee4e8585"},
]
[package.dependencies]
pydantic = ">=2.7.0"
python-dotenv = ">=0.21.0"
[package.extras]
azure-key-vault = ["azure-identity (>=1.16.0)", "azure-keyvault-secrets (>=4.8.0)"]
toml = ["tomli (>=2.0.1)"]
yaml = ["pyyaml (>=6.0.1)"]
[[package]]
name = "pygments"
version = "2.19.1"
@ -1275,6 +1296,21 @@ files = [
[package.dependencies]
six = ">=1.5"
[[package]]
name = "python-dotenv"
version = "1.1.0"
description = "Read key-value pairs from a .env file and set them as environment variables"
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d"},
{file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"},
]
[package.extras]
cli = ["click (>=5.0)"]
[[package]]
name = "pytz"
version = "2025.1"
@ -1698,4 +1734,4 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = ">=3.12"
content-hash = "8d7db7d1395e8a16703b9b23de79f9d0e53307dea379e229a26081bc301aaf93"
content-hash = "f49b1428e8390de0d5989fcd7319d0f1e54d29127e967a3a7d9c07acd74e6d39"

View file

@ -16,6 +16,7 @@ dependencies = [
"fuzzywuzzy (>=0.18.0,<0.19.0)",
"sqlmodel (>=0.0.24,<0.0.25)",
"psycopg2 (>=2.9.10,<3.0.0)",
"pydantic-settings (>=2.8.1,<3.0.0)",
]
[tool.poetry]