mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-30 13:10:56 +00:00
proof of concept works
This commit is contained in:
parent
b8a0548798
commit
fbc6e76845
8 changed files with 108 additions and 8 deletions
3
.db-env
3
.db-env
|
|
@ -3,4 +3,5 @@ POSTGRES_PASSWORD=makingwarmhomes
|
|||
POSTGRES_HOST=localhost
|
||||
POSTGRES_PORT=5432
|
||||
PGADMIN_DEFAULT_EMAIL=junte@domna.homes
|
||||
PGADMIN_DEFAULT_PASSWORD=makingwarmhomes
|
||||
PGADMIN_DEFAULT_PASSWORD=makingwarmhomes
|
||||
DATABASE_URL=postgresql://postgres:makingwarmhomes@db:5432/postgres
|
||||
|
|
@ -17,7 +17,8 @@
|
|||
"ms-python.python",
|
||||
"ms-toolsai.jupyter",
|
||||
"mechatroner.rainbow-csv",
|
||||
"ms-toolsai.datawrangler"
|
||||
"ms-toolsai.datawrangler",
|
||||
"lindacong.vscode-book-reader"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ services:
|
|||
context: ..
|
||||
dockerfile: .devcontainer/Dockerfile
|
||||
command: sleep infinity
|
||||
env_file:
|
||||
- ../.db-env
|
||||
volumes:
|
||||
- ..:/workspaces/survey-extractor:cached
|
||||
|
||||
|
|
|
|||
35
etl/load.py
35
etl/load.py
|
|
@ -1,3 +1,38 @@
|
|||
# A very simple script to read a presite note and load to a postgres database
|
||||
import os
|
||||
from etl.surveyedData.surveryedData import surveyedDataProcessor
|
||||
from etl.transform.types import get_db_session, init_db, AssessorInfo
|
||||
|
||||
pre_site_note_path = os.path.join(os.getcwd(), "..", "example_data", "pre_site_note.pdf")
|
||||
|
||||
survey_one = surveyedDataProcessor("123 Fake Street", [pre_site_note_path])
|
||||
|
||||
assert survey_one.pre_site_note.assessor_information.accreditation_number == "QUID211435"
|
||||
|
||||
init_db()
|
||||
|
||||
assessor1 = AssessorInfo(
|
||||
accreditation_number=survey_one.pre_site_note.assessor_information.accreditation_number,
|
||||
name=survey_one.pre_site_note.assessor_information.name,
|
||||
phone_number=survey_one.pre_site_note.assessor_information.phone_number,
|
||||
email_address=survey_one.pre_site_note.assessor_information.email_address,
|
||||
)
|
||||
|
||||
with get_db_session() as session:
|
||||
session.add(assessor1)
|
||||
session.commit()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Download a pdf file and save it in git for easier processes and tests
|
||||
# scrape the correct data
|
||||
# Pydantic structure
|
||||
# Load to database successfully locally
|
||||
|
|
@ -1,12 +1,36 @@
|
|||
from sqlmodel import Field, SQLModel, create_engine, Relationship
|
||||
from uuid import uuid4
|
||||
from sqlmodel import Field, SQLModel, create_engine, Relationship, Session
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from pydantic import field_validator, EmailStr
|
||||
from pydantic_settings import BaseSettings
|
||||
from typing import Optional, List
|
||||
from sqlalchemy import Column
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
|
||||
class Settings(BaseSettings):
|
||||
DATABASE_URL: Optional[str] = None # Default to None if not set
|
||||
|
||||
class Config:
|
||||
env_file = ".env" # Load from an optional .env file
|
||||
|
||||
settings = Settings()
|
||||
engine = create_engine(settings.DATABASE_URL) if settings.DATABASE_URL else None
|
||||
|
||||
|
||||
def get_db_session():
|
||||
if engine is None:
|
||||
raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.")
|
||||
return Session(engine)
|
||||
|
||||
def init_db():
|
||||
if engine:
|
||||
SQLModel.metadata.create_all(engine)
|
||||
|
||||
class BaseModel(SQLModel):
|
||||
id: str = Field(default_factory=uuid4, primary_key=True, index=True, sa_column_kwargs={'type_': 'UUID'})
|
||||
|
||||
id: uuid.UUID = Field(
|
||||
default_factory=uuid.uuid4,
|
||||
sa_column=Column(UUID(as_uuid=True), primary_key=True)
|
||||
)
|
||||
|
||||
class Dimension(BaseModel):
|
||||
floor_area_m2: float
|
||||
|
|
@ -75,7 +99,7 @@ class Door(BaseModel):
|
|||
no_of_insulated_doors: int
|
||||
u_value_w_m2_k: Optional[str]
|
||||
|
||||
class AssessorInfo(BaseModel):
|
||||
class AssessorInfo(BaseModel, table=True):
|
||||
accreditation_number: str
|
||||
name: str
|
||||
phone_number: Optional[str] = None
|
||||
|
|
|
|||
BIN
example_data/pre_site_note.pdf
Normal file
BIN
example_data/pre_site_note.pdf
Normal file
Binary file not shown.
38
poetry.lock
generated
38
poetry.lock
generated
|
|
@ -1206,6 +1206,27 @@ files = [
|
|||
[package.dependencies]
|
||||
typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
|
||||
|
||||
[[package]]
|
||||
name = "pydantic-settings"
|
||||
version = "2.8.1"
|
||||
description = "Settings management using Pydantic"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pydantic_settings-2.8.1-py3-none-any.whl", hash = "sha256:81942d5ac3d905f7f3ee1a70df5dfb62d5569c12f51a5a647defc1c3d9ee2e9c"},
|
||||
{file = "pydantic_settings-2.8.1.tar.gz", hash = "sha256:d5c663dfbe9db9d5e1c646b2e161da12f0d734d422ee56f567d0ea2cee4e8585"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
pydantic = ">=2.7.0"
|
||||
python-dotenv = ">=0.21.0"
|
||||
|
||||
[package.extras]
|
||||
azure-key-vault = ["azure-identity (>=1.16.0)", "azure-keyvault-secrets (>=4.8.0)"]
|
||||
toml = ["tomli (>=2.0.1)"]
|
||||
yaml = ["pyyaml (>=6.0.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "pygments"
|
||||
version = "2.19.1"
|
||||
|
|
@ -1275,6 +1296,21 @@ files = [
|
|||
[package.dependencies]
|
||||
six = ">=1.5"
|
||||
|
||||
[[package]]
|
||||
name = "python-dotenv"
|
||||
version = "1.1.0"
|
||||
description = "Read key-value pairs from a .env file and set them as environment variables"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d"},
|
||||
{file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
cli = ["click (>=5.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pytz"
|
||||
version = "2025.1"
|
||||
|
|
@ -1698,4 +1734,4 @@ files = [
|
|||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.12"
|
||||
content-hash = "8d7db7d1395e8a16703b9b23de79f9d0e53307dea379e229a26081bc301aaf93"
|
||||
content-hash = "f49b1428e8390de0d5989fcd7319d0f1e54d29127e967a3a7d9c07acd74e6d39"
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ dependencies = [
|
|||
"fuzzywuzzy (>=0.18.0,<0.19.0)",
|
||||
"sqlmodel (>=0.0.24,<0.0.25)",
|
||||
"psycopg2 (>=2.9.10,<3.0.0)",
|
||||
"pydantic-settings (>=2.8.1,<3.0.0)",
|
||||
]
|
||||
|
||||
[tool.poetry]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue