mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
Merge pull request #78 from Hestia-Homes/feature/document_upload
Feature/document upload
This commit is contained in:
commit
6d28e2cd93
13 changed files with 440 additions and 122 deletions
8
.db-env
8
.db-env
|
|
@ -1,10 +1,10 @@
|
|||
POSTGRES_USER=postgres
|
||||
OSTGRES_USER=postgres
|
||||
PGDATABASE=surveyDB
|
||||
POSTGRES_PASSWORD=makingwarmhomes
|
||||
POSTGRES_HOST=localhost
|
||||
POSTGRES_PORT=5432
|
||||
PGADMIN_DEFAULT_EMAIL=junte@domna.homes
|
||||
PGADMIN_DEFAULT_PASSWORD=makingwarmhomes
|
||||
DATABASE_URL=postgresql://postgres:makingwarmhomes@db:5432/postgres
|
||||
# Prod(dev-aws) Database Don't use!!!!
|
||||
#DATABASE_URL=postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB
|
||||
# DATABASE_URL=postgresql://postgres:makingwarmhomes@db:5432/postgres
|
||||
# Prod(dev-aws) Database Don't use
|
||||
DATABASE_URL=postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB
|
||||
2
.github/workflows/lambda_main.yml
vendored
2
.github/workflows/lambda_main.yml
vendored
|
|
@ -2,7 +2,7 @@ name: Lambda Main Workflow
|
|||
|
||||
on:
|
||||
push:
|
||||
branches: [main, feature/seperate_terraform_with_different_states]
|
||||
branches: [main, feature/document_upload]
|
||||
|
||||
env:
|
||||
AWS_REGION: eu-west-2
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from sqlmodel import SQLModel
|
|||
from etl.models.topLevel import *
|
||||
from etl.models.preSiteNoteTypes import *
|
||||
from etl.models.conditionReport import *
|
||||
from etl.fileReader.reportType import ReportType
|
||||
|
||||
import os
|
||||
|
||||
|
|
|
|||
34
alembic/versions/29113d69989e_enum_things.py
Normal file
34
alembic/versions/29113d69989e_enum_things.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
"""enum things
|
||||
|
||||
Revision ID: 29113d69989e
|
||||
Revises: 2cf02c9f71f8
|
||||
Create Date: 2025-08-19 11:40:52.712131
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '29113d69989e'
|
||||
down_revision: Union[str, None] = '2cf02c9f71f8'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
op.execute("ALTER TYPE reporttype ADD VALUE IF NOT EXISTS 'osmosis_condition_pas_2035_report'")
|
||||
op.execute("ALTER TYPE reporttype ADD VALUE IF NOT EXISTS 'warm_homes_condition_pas_2035_report'")
|
||||
op.execute("ALTER TYPE reporttype ADD VALUE IF NOT EXISTS 'energy_performance_report_with_data'")
|
||||
op.execute("ALTER TYPE reporttype ADD VALUE IF NOT EXISTS 'energy_performance_report_summary_information'")
|
||||
op.execute("ALTER TYPE reporttype ADD VALUE IF NOT EXISTS 'lodgement_xml_needed_for_lodgement_to_like_trademark'")
|
||||
op.execute("ALTER TYPE reporttype ADD VALUE IF NOT EXISTS 'reduce_xml_needed_to_generate_full_sap_xml'")
|
||||
op.execute("ALTER TYPE reporttype ADD VALUE IF NOT EXISTS 'full_xml_needed_for_co_ordination'")
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
pass
|
||||
# ### end Alembic commands ###
|
||||
41
alembic/versions/2cf02c9f71f8_add_missing_report_type.py
Normal file
41
alembic/versions/2cf02c9f71f8_add_missing_report_type.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
"""add missing report type
|
||||
|
||||
Revision ID: 2cf02c9f71f8
|
||||
Revises: 253a1047c623
|
||||
Create Date: 2025-08-19 11:36:16.006276
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
from sqlalchemy.dialects import postgresql as psql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '2cf02c9f71f8'
|
||||
down_revision: Union[str, None] = '253a1047c623'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.alter_column(
|
||||
"uploaded_files",
|
||||
"s3_json_uri",
|
||||
type_=psql.JSON(), # or psql.JSONB()
|
||||
postgresql_using="s3_json_uri::json", # or ::jsonb
|
||||
existing_type=sa.VARCHAR(),
|
||||
existing_nullable=True,
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.alter_column('uploaded_files', 's3_json_uri',
|
||||
existing_type=postgresql.JSON(astext_type=sa.Text()),
|
||||
type_=sa.VARCHAR(),
|
||||
existing_nullable=True)
|
||||
# ### end Alembic commands ###
|
||||
38
alembic/versions/a8cc4a5fccb6_json_uri_is_a_string.py
Normal file
38
alembic/versions/a8cc4a5fccb6_json_uri_is_a_string.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
"""json_uri is a string
|
||||
|
||||
Revision ID: a8cc4a5fccb6
|
||||
Revises: 29113d69989e
|
||||
Create Date: 2025-08-19 12:35:59.456912
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = 'a8cc4a5fccb6'
|
||||
down_revision: Union[str, None] = '29113d69989e'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.alter_column('uploaded_files', 's3_json_uri',
|
||||
existing_type=postgresql.JSON(astext_type=sa.Text()),
|
||||
type_=sa.Text(),
|
||||
existing_nullable=True)
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.alter_column('uploaded_files', 's3_json_uri',
|
||||
existing_type=sa.Text(),
|
||||
type_=postgresql.JSON(astext_type=sa.Text()),
|
||||
existing_nullable=True)
|
||||
# ### end Alembic commands ###
|
||||
|
|
@ -1,26 +1,220 @@
|
|||
"""
|
||||
A quick example of lambda working a function in python
|
||||
"""
|
||||
from etl.read_stuff_from_s3_example import print_hello_from_etl_module
|
||||
import os
|
||||
import tempfile
|
||||
import requests
|
||||
import boto3
|
||||
from urllib.parse import urlparse
|
||||
from etl.fileReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.fileReader.sitenotes import (
|
||||
SiteNotesExtractor,
|
||||
WarmHomesConditionReport
|
||||
)
|
||||
|
||||
from uuid import UUID
|
||||
import json
|
||||
from typing import Any
|
||||
from etl.db.db import get_db_session, init_db
|
||||
from typing import Union
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from sqlmodel import select
|
||||
from sqlalchemy import update
|
||||
from etl.models.topLevel import uploaded_files
|
||||
|
||||
def update_uploaded_file_json_uri_by_query(
|
||||
db_session,
|
||||
file_id: Union[str, uuid.UUID],
|
||||
json_uri: str,
|
||||
):
|
||||
"""
|
||||
Query uploaded_files by id, update s3_json_uri and s3_json_upload_timestamp,
|
||||
commit, refresh, and return the ORM object. Raises ValueError if not found.
|
||||
"""
|
||||
try:
|
||||
file_id_norm = uuid.UUID(str(file_id))
|
||||
except (ValueError, AttributeError, TypeError):
|
||||
file_id_norm = file_id # leave as-is if not a UUID
|
||||
|
||||
obj = (
|
||||
db_session
|
||||
.query(uploaded_files)
|
||||
.filter(uploaded_files.id == file_id_norm)
|
||||
.first()
|
||||
)
|
||||
obj.s3_json_uri = json_uri
|
||||
obj.s3_json_upload_timestamp = datetime.now(timezone.utc)
|
||||
|
||||
db_session.add(obj)
|
||||
db_session.commit()
|
||||
db_session.refresh(obj)
|
||||
return obj
|
||||
|
||||
def serialize_model(model: Any):
|
||||
"""Recursively convert Pydantic models/lists into plain dicts."""
|
||||
if hasattr(model, "dict"):
|
||||
return {k: serialize_model(v) for k, v in model.dict().items()}
|
||||
elif isinstance(model, list):
|
||||
return [serialize_model(item) for item in model]
|
||||
else:
|
||||
return model
|
||||
|
||||
def make_final_json(rooms_obj, heating_system_obj):
|
||||
# Convert to dict recursively
|
||||
rooms_data = serialize_model(rooms_obj)
|
||||
heating_data = serialize_model(heating_system_obj)
|
||||
|
||||
# Combine into one big JSON-ready dict
|
||||
final_data = {
|
||||
"rooms": rooms_data,
|
||||
"heating_system": heating_data
|
||||
}
|
||||
|
||||
# Convert to pretty JSON string
|
||||
return final_data
|
||||
|
||||
def parse_s3_uri(uri: str):
|
||||
"""
|
||||
Parse an S3 URI or HTTPS S3 URL into bucket and key.
|
||||
Supports formats:
|
||||
- s3://bucket-name/path/to/file
|
||||
- https://bucket-name.s3.region.amazonaws.com/path/to/file
|
||||
"""
|
||||
parsed = urlparse(uri)
|
||||
|
||||
if parsed.scheme == "s3":
|
||||
# s3://bucket/key
|
||||
bucket = parsed.netloc
|
||||
key = parsed.path.lstrip("/")
|
||||
elif parsed.scheme in ("http", "https"):
|
||||
# https://bucket-name.s3.region.amazonaws.com/key
|
||||
host_parts = parsed.netloc.split(".")
|
||||
if len(host_parts) >= 3 and host_parts[1] == "s3":
|
||||
bucket = host_parts[0]
|
||||
else:
|
||||
raise ValueError("Not a valid S3 HTTPS URL format")
|
||||
key = parsed.path.lstrip("/")
|
||||
else:
|
||||
raise ValueError("Unsupported URI scheme")
|
||||
|
||||
return bucket, key
|
||||
|
||||
|
||||
def download_private_s3_file(uri) -> str:
|
||||
bucket_name, key = parse_s3_uri(uri)
|
||||
"""
|
||||
Download a private S3 file using hardcoded AWS credentials.
|
||||
Saves it to /tmp and returns the local file path.
|
||||
"""
|
||||
|
||||
# Hardcoded AWS credentials (quick testing only)
|
||||
aws_access_key = "AKIAU5A36PPNJMZZ3KRW"
|
||||
aws_secret_key = "Pr5uxwh1zOCocKuFDA4DWQX039t0h2mnM7kaxlSt"
|
||||
aws_region = "eu-west-2"
|
||||
|
||||
# Where to store the file locally
|
||||
tmp_dir = tempfile.gettempdir()
|
||||
filename = os.path.basename(key)
|
||||
file_path = os.path.join(tmp_dir, filename)
|
||||
|
||||
# Create S3 client with hardcoded creds
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
aws_access_key_id=aws_access_key,
|
||||
aws_secret_access_key=aws_secret_key,
|
||||
region_name=aws_region
|
||||
)
|
||||
|
||||
# Download file
|
||||
s3.download_file(bucket_name, key, file_path)
|
||||
|
||||
return file_path
|
||||
|
||||
def upload_json_to_s3(json_obj, dest_uri: str) -> str:
|
||||
"""
|
||||
Upload a JSON-serializable object to S3 at the given s3:// or https S3 URL.
|
||||
Returns the public-style HTTPS S3 URL (still private if bucket is private).
|
||||
"""
|
||||
bucket, pdf_key = parse_s3_uri(dest_uri)
|
||||
base_folder = os.path.dirname(pdf_key) # e.g. ".../report"
|
||||
|
||||
# Build jsonified folder + timestamp filename
|
||||
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
|
||||
json_key = f"{base_folder}/jsonified/{timestamp}.json"
|
||||
|
||||
# Same region/creds you used for download
|
||||
aws_access_key = "AKIAU5A36PPNJMZZ3KRW"
|
||||
aws_secret_key = "Pr5uxwh1zOCocKuFDA4DWQX039t0h2mnM7kaxlSt"
|
||||
aws_region = "eu-west-2"
|
||||
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
aws_access_key_id=aws_access_key,
|
||||
aws_secret_access_key=aws_secret_key,
|
||||
region_name=aws_region
|
||||
)
|
||||
|
||||
body = json.dumps(json_obj, ensure_ascii=False, indent=2).encode("utf-8")
|
||||
|
||||
s3.put_object(
|
||||
Bucket=bucket,
|
||||
Key=json_key,
|
||||
Body=body,
|
||||
ContentType="application/json"
|
||||
# Optional hardening:
|
||||
# , ServerSideEncryption="AES256"
|
||||
)
|
||||
|
||||
# Return an HTTPS-style S3 URL (matches your input style)
|
||||
return f"https://{bucket}.s3.{aws_region}.amazonaws.com/{json_key}"
|
||||
|
||||
def get_file_uri(id):
|
||||
with get_db_session() as session:
|
||||
obj = (
|
||||
session
|
||||
.query(uploaded_files)
|
||||
.filter(uploaded_files.id == id)
|
||||
.first()
|
||||
)
|
||||
if obj is None:
|
||||
raise RuntimeError(f"Failed to find uploaded_files record with id {id}")
|
||||
|
||||
return obj.s3_file_uri
|
||||
|
||||
|
||||
def handler(event, context):
|
||||
print("Outside try statment")
|
||||
print_hello_from_etl_module()
|
||||
try:
|
||||
print("show me something.. anything...")
|
||||
s3_uri = event.get("file_location")
|
||||
if not s3_uri:
|
||||
print("failed to get s3_uri")
|
||||
return {
|
||||
"statusCode": 400,
|
||||
"body": "Missing 'file_location' in event"
|
||||
}
|
||||
print(f"s3 uri is {s3_uri}")
|
||||
print("trying to connect to db")
|
||||
init_db()
|
||||
print("connected to db")
|
||||
for r in event.get("Records", []):
|
||||
body = json.loads(r["body"])
|
||||
id_ = body.get("id")
|
||||
if not id_: # covers None or empty string
|
||||
raise ValueError(f"❌ Missing 'id' in SQS body: {body}")
|
||||
|
||||
return {
|
||||
"statusCode": 200,
|
||||
"body": f"s3 uri {s3_uri}"
|
||||
}
|
||||
print(f"Retrieving file uri with id {id_}")
|
||||
file_uri = get_file_uri(id_)
|
||||
print(f"Retrieved file uri with {file_uri}")
|
||||
|
||||
print("Downloading file locally for extraction...")
|
||||
local_path = download_private_s3_file(file_uri)
|
||||
|
||||
print("Extracting file...")
|
||||
reader = pdfReaderToText(local_path)
|
||||
obj = WarmHomesConditionReport(reader.text_list)
|
||||
json_ = make_final_json(obj.master_obj[0], obj.master_obj[1])
|
||||
print("Extracted completed, made json")
|
||||
|
||||
print("uploading json to s3 bucket...")
|
||||
json_uri = upload_json_to_s3(json_, file_uri)
|
||||
|
||||
print("Updating Database with json_uri")
|
||||
with get_db_session() as session:
|
||||
update_uploaded_file_json_uri_by_query(
|
||||
session,
|
||||
id_,
|
||||
json_uri,
|
||||
)
|
||||
print("job completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
|
|
|
|||
|
|
@ -59,7 +59,17 @@ resource "aws_lambda_function" "extractor_and_loader" {
|
|||
role = data.aws_iam_role.lambda_exec_role.arn
|
||||
package_type = "Image"
|
||||
image_uri = "${data.aws_ecr_repository.extractor_and_loader.repository_url}:${var.lambda_image_tag}"
|
||||
timeout = 30
|
||||
# Increase timeout (max 900 sec / 15 min)
|
||||
timeout = 300 # e.g. 5 minutes
|
||||
|
||||
# Increase memory (default 128 MB)
|
||||
memory_size = 2048 # try 1024 or 2048 MB to start
|
||||
|
||||
environment {
|
||||
variables = {
|
||||
DATABASE_URL = "postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# SQS trigger
|
||||
|
|
|
|||
|
|
@ -105,32 +105,24 @@ class WarmHomesConditionReport(SiteNotesExtractor):
|
|||
def __init__(self, data_list):
|
||||
super().__init__(data_list)
|
||||
self.type = ReportType.WARM_HOMES_CONDITION_REPORT
|
||||
self.master_obj = self.setup_condition_report()
|
||||
room, heating_system = self.setup_condition_report()
|
||||
self.master_obj = room, heating_system
|
||||
|
||||
def setup_condition_report(self):
|
||||
general_information = self.get_section_1()
|
||||
access_and_elevations = self.get_section_2()
|
||||
# general_information = self.get_section_1()
|
||||
# access_and_elevations = self.get_section_2()
|
||||
rooms = self.get_section_3()
|
||||
heating_system = self.get_section_4()
|
||||
occupant_assessment = self.get_section_5()
|
||||
site_name, reference_code, address, postcode = self.get_section_0()
|
||||
# occupant_assessment = self.get_section_5()
|
||||
# site_name, reference_code, address, postcode = self.get_section_0()
|
||||
|
||||
return ConditionReportModel(
|
||||
project_site_name=site_name,
|
||||
property_reference_code=reference_code,
|
||||
property_address=address,
|
||||
postcode=postcode,
|
||||
general_information=general_information,
|
||||
access_and_elevations=access_and_elevations,
|
||||
rooms=rooms,
|
||||
heating_system=heating_system,
|
||||
occupancy_assessment=occupant_assessment,
|
||||
)
|
||||
return rooms, heating_system
|
||||
|
||||
def get_section_0(self):
|
||||
data = self.get_data_between("Project Site Name", "1. General Information")
|
||||
site_name = self.get_next_value(data, "Project Site Name")
|
||||
reference_code = self.get_next_value(data, "Property Reference Code")
|
||||
# reference_code = self.get_next_value(data, "Property Reference Code")
|
||||
reference_code = "Place holder, please delete me for prod"
|
||||
address = self.get_next_value(data, "Property Address")
|
||||
postcode = self.get_data_between("Postcode", "Main Image")[1:]
|
||||
postcode = " ".join(postcode)
|
||||
|
|
@ -155,7 +147,8 @@ class WarmHomesConditionReport(SiteNotesExtractor):
|
|||
def get_assessor_details(self):
|
||||
data = self.get_data_between("1.1 Assessor details","1.2 Inspection & Project")
|
||||
assessor_name = self.get_next_value(data, "Assessor Name & ID")
|
||||
elmhirst_id = self.get_next_value(data, "Please enter name & Elmhurst ID")
|
||||
# elmhirst_id = self.get_next_value(data, "Property Reference Code")
|
||||
elmhirst_id = "please remove me in prod"
|
||||
return AssessorDetails(assessor_name_and_id=assessor_name, elmhurst_id=elmhirst_id)
|
||||
|
||||
def get_inspection_and_project(self):
|
||||
|
|
@ -164,7 +157,7 @@ class WarmHomesConditionReport(SiteNotesExtractor):
|
|||
return InspectionAndProject(inspection_date=date)
|
||||
|
||||
def get_the_property(self):
|
||||
data = self.get_data_between("1.3 The property", "Main elevation")
|
||||
data = self.get_data_between("1.3 The property", "3. Rooms")
|
||||
return TheProperty(
|
||||
house_type=self.get_next_value(data, "House Type"),
|
||||
on_which_floor_is_the_flat_located = self.get_next_value(data, "On which floor is the flat located?"),
|
||||
|
|
@ -356,7 +349,7 @@ class WarmHomesConditionReport(SiteNotesExtractor):
|
|||
ventilation = VentilationInfo(
|
||||
is_there_a_ventilation_system_present_in_the_room=True if self.get_next_value(data, "Is there a ventilation system present in the room?").lower() == "yes" else False,
|
||||
are_there_any_visible_or_reported_signs_of_damp_mould_or_excessive_condensation_within_the_room=True if self.get_next_value(data, "excessive condensation within the room?").lower() == "yes" else False,
|
||||
are_there_sufficient_undercuts_on_the_closed_door=self.get_next_value(data, "- min 7.6mm per 1000mm width door)?"),
|
||||
are_there_sufficient_undercuts_on_the_closed_door=self.get_next_value(data, "- min 10mm)?"),
|
||||
is_there_any_open_flue_heating_appliances_within_the_room=True if self.get_next_value(data, "Is there any open flue heating appliances within the room?").lower() == "yes" else False,
|
||||
)
|
||||
|
||||
|
|
@ -520,7 +513,7 @@ class WarmHomesConditionReport(SiteNotesExtractor):
|
|||
general_condition_of_heating_system = self.get_general_condition_of_heating_system()
|
||||
main_heating_one = self.get_main_heating_one()
|
||||
main_heating_two = self.get_main_heating_two()
|
||||
secondary_heating = self.get_secondary_heating()
|
||||
# secondary_heating = self.get_secondary_heating()
|
||||
heating_by_room = self.get_heating_by_room()
|
||||
renewables = self.get_renewables()
|
||||
|
||||
|
|
@ -528,7 +521,7 @@ class WarmHomesConditionReport(SiteNotesExtractor):
|
|||
general_condition=general_condition_of_heating_system,
|
||||
main_heating_one=main_heating_one,
|
||||
main_heating_two=main_heating_two,
|
||||
secondary_heating=secondary_heating,
|
||||
# secondary_heating=secondary_heating,
|
||||
heating_by_room=heating_by_room,
|
||||
renewables=renewables
|
||||
)
|
||||
|
|
@ -547,19 +540,19 @@ class WarmHomesConditionReport(SiteNotesExtractor):
|
|||
is_there_a_main_heating_two=True if self.get_next_value(data, "Is there a Main Heating 2?").lower() == "yes" else False,
|
||||
)
|
||||
|
||||
def get_secondary_heating(self):
|
||||
data = self.get_data_between("Secondary Heating", "Heating by room")
|
||||
return SecondaryHeating(
|
||||
is_there_a_secondary_heating=True if self.get_next_value(data, "Is there a Secondary Heating?").lower() == "yes" else False,
|
||||
fuel=self.get_next_value(data, "Fuel"),
|
||||
electric_heating_type=self.get_next_value_greedy(data, "Type", 2),
|
||||
gas_heating_type=self.get_x_value(data, "Type", 1),
|
||||
)
|
||||
# def get_secondary_heating(self):
|
||||
# data = self.get_data_between("Secondary Heating", "Heating by room")
|
||||
# return SecondaryHeating(
|
||||
# is_there_a_secondary_heating=True if self.get_next_value(data, "Is there a Secondary Heating?").lower() == "yes" else False,
|
||||
# fuel=self.get_next_value(data, "Fuel:"),
|
||||
# electric_heating_type=self.get_next_value_greedy(data, "Type", 2),
|
||||
# gas_heating_type=self.get_x_value(data, "Type", 1),
|
||||
# )
|
||||
|
||||
def get_heating_by_room(self):
|
||||
data = self.get_data_between("Heating by room", "Renewables")
|
||||
list_of_main_heating_system_by_one = self.get_data_between("Rooms heated by Main System 1:", "Rooms Heated by Main System 2")[1:]
|
||||
list_of_main_heating_system_by_two = self.get_data_between("Rooms Heated by Main System 2", "Rooms heated by Secondary Heating:")[1:]
|
||||
list_of_main_heating_system_by_one = self.get_data_between("Rooms heated by Main System 1:", "Rooms heated by Main System 2:")[1:]
|
||||
list_of_main_heating_system_by_two = self.get_data_between("Rooms heated by Main System 2:", "Rooms heated by Secondary Heating:")[1:]
|
||||
list_of_rooms_heated_by_secondary_heating = self.get_data_between("Rooms heated by Secondary Heating:", "Are there any partially heated rooms?")[1:]
|
||||
are_there_any_partially_heated_rooms = True if self.get_next_value(data, "Are there any partially heated rooms?").lower() == "yes" else False
|
||||
are_there_any_unheated_rooms = True if self.get_next_value(data, "Are there any unheated rooms?").lower() == "yes" else False
|
||||
|
|
@ -606,8 +599,9 @@ class WarmHomesConditionReport(SiteNotesExtractor):
|
|||
is_the_heating_system_in_working_order=True if self.get_next_value(data, "Is the Heating System in working order?") else False,
|
||||
does_the_occupant_have_a_smart_meter=True if self.get_next_value(data, "Does the occupant have a Smart Meter?") else False,
|
||||
are_there_any_smart_monitoring_devices=True if self.get_next_value(data, "Are there any smart monitoring devices (Switchee etc)?") else False,
|
||||
are_the_gas_and_electricity_meters_accessible=True if self.get_next_value(data, "Are the Gas and Electricity Meters accessible?") else False,
|
||||
dual_or_single_electric_meter=self.get_next_value(data, "Dual or single electric meter?"),
|
||||
are_the_electricity_meters_accessible=True if self.get_next_value(data, "Is the Electricity Meter accessible?") else False,
|
||||
are_the_gas_meters_accessible=True if self.get_next_value(data, "Is the Gas Meter accessible?") else False,
|
||||
# dual_or_single_electric_meter=self.get_next_value(data, "Dual or single electric meter?"),
|
||||
)
|
||||
|
||||
def get_section_5(self):
|
||||
|
|
@ -1373,62 +1367,62 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor):
|
|||
)
|
||||
)
|
||||
|
||||
def get_secondary_heating(self):
|
||||
data = self.raw_data[self.raw_data.index("14.1 Main Heating2"):self.raw_data.index("14.2 Secondary Heating Type")]
|
||||
main_titles = [
|
||||
"Second Main Heating Type",
|
||||
"Main Heating System Controls",
|
||||
]
|
||||
sub_titles = [
|
||||
"Percentage of Heated Floor Area Served (%)",
|
||||
"Heating Source",
|
||||
"Efficiency Source",
|
||||
"Heating Fuel",
|
||||
"SAP 2009 Table 4a/4b",
|
||||
"Heating Type",
|
||||
"Heating Description",
|
||||
"Control Type",
|
||||
"Flue Type",
|
||||
"Fan Assisted Flue",
|
||||
"Heat Emitter Type",
|
||||
]
|
||||
list = self.two_column_with_extension_processor(data, sub_titles, main_titles)
|
||||
dict_ = list[0]
|
||||
# def get_secondary_heating(self):
|
||||
# data = self.raw_data[self.raw_data.index("14.1 Main Heating2"):self.raw_data.index("14.2 Secondary Heating Type")]
|
||||
# main_titles = [
|
||||
# "Second Main Heating Type",
|
||||
# "Main Heating System Controls",
|
||||
# ]
|
||||
# sub_titles = [
|
||||
# "Percentage of Heated Floor Area Served (%)",
|
||||
# "Heating Source",
|
||||
# "Efficiency Source",
|
||||
# "Heating Fuel",
|
||||
# "SAP 2009 Table 4a/4b",
|
||||
# "Heating Type",
|
||||
# "Heating Description",
|
||||
# "Control Type",
|
||||
# "Flue Type",
|
||||
# "Fan Assisted Flue",
|
||||
# "Heat Emitter Type",
|
||||
# ]
|
||||
# list = self.two_column_with_extension_processor(data, sub_titles, main_titles)
|
||||
# dict_ = list[0]
|
||||
|
||||
return Heating(
|
||||
type="secondary",
|
||||
percentage_of_heated_floor_area_served=dict_.get("percentage_of_heated_floor_area_served_(%)", ""),
|
||||
heating_source=dict_.get("heating_source", "") if dict_["heating_source"] is not None else "",
|
||||
efficiency_source=dict_.get("efficiency_source", "") if dict_["efficiency_source"] is not None else "",
|
||||
heating_fuel=dict_.get("heating_fuel", "") if dict_.get("heating_fuel") is not None else "",
|
||||
brand_name=dict_.get("brand_name", ""),
|
||||
model_name=dict_.get("model_name", ""),
|
||||
model_qualifer=dict_.get("model_qualifier", ""),
|
||||
controls=HeatingSystemControls(
|
||||
control_type=dict_.get("control_type", "") if dict_.get("control_type", "") is not None else "",
|
||||
flue_type=dict_.get("flue_type","") if dict_.get("flue_type", "") is not None else "",
|
||||
fan_assisted_flue=True if dict_.get("fan_assisted_flue", "NO").upper() == "YES" else False,
|
||||
heat_emitter_type=dict_.get("heat_emitter_type", "") if dict_.get("heat_emitter_type") is not None else "",
|
||||
electricity_meter_type=dict_.get("electricity_meter_type", "") if dict_.get("electricity_meter_type", "") is not None else "",
|
||||
mains_gas_available=True if dict_.get("mains_gas_available", "NO").upper() == "YES" else False,
|
||||
)
|
||||
)
|
||||
# return Heating(
|
||||
# type="secondary",
|
||||
# percentage_of_heated_floor_area_served=dict_.get("percentage_of_heated_floor_area_served_(%)", ""),
|
||||
# heating_source=dict_.get("heating_source", "") if dict_["heating_source"] is not None else "",
|
||||
# efficiency_source=dict_.get("efficiency_source", "") if dict_["efficiency_source"] is not None else "",
|
||||
# heating_fuel=dict_.get("heating_fuel", "") if dict_.get("heating_fuel") is not None else "",
|
||||
# brand_name=dict_.get("brand_name", ""),
|
||||
# model_name=dict_.get("model_name", ""),
|
||||
# model_qualifer=dict_.get("model_qualifier", ""),
|
||||
# controls=HeatingSystemControls(
|
||||
# control_type=dict_.get("control_type", "") if dict_.get("control_type", "") is not None else "",
|
||||
# flue_type=dict_.get("flue_type","") if dict_.get("flue_type", "") is not None else "",
|
||||
# fan_assisted_flue=True if dict_.get("fan_assisted_flue", "NO").upper() == "YES" else False,
|
||||
# heat_emitter_type=dict_.get("heat_emitter_type", "") if dict_.get("heat_emitter_type") is not None else "",
|
||||
# electricity_meter_type=dict_.get("electricity_meter_type", "") if dict_.get("electricity_meter_type", "") is not None else "",
|
||||
# mains_gas_available=True if dict_.get("mains_gas_available", "NO").upper() == "YES" else False,
|
||||
# )
|
||||
# )
|
||||
|
||||
def get_secondary_heating_type(self):
|
||||
data = self.raw_data[self.raw_data.index("14.2 Secondary Heating Type"):self.raw_data.index("15.0 Water Heating")]
|
||||
avoid = [
|
||||
"14.2 Secondary Heating Type",
|
||||
"15.0 Water Heating",
|
||||
]
|
||||
sub_titles = [
|
||||
"Heating Type",
|
||||
"Fuel Type",
|
||||
]
|
||||
dict_ = self.two_columns_processor(data, sub_titles, avoid)
|
||||
return HeatingType(
|
||||
heating_type=dict_.get("heating_type", ""),
|
||||
fuel_type=dict_.get("fuel_type", ""),
|
||||
)
|
||||
# def get_secondary_heating_type(self):
|
||||
# data = self.raw_data[self.raw_data.index("14.2 Secondary Heating Type"):self.raw_data.index("15.0 Water Heating")]
|
||||
# avoid = [
|
||||
# "14.2 Secondary Heating Type",
|
||||
# "15.0 Water Heating",
|
||||
# ]
|
||||
# sub_titles = [
|
||||
# "Heating Type",
|
||||
# "Fuel Type",
|
||||
# ]
|
||||
# dict_ = self.two_columns_processor(data, sub_titles, avoid)
|
||||
# return HeatingType(
|
||||
# heating_type=dict_.get("heating_type", ""),
|
||||
# fuel_type=dict_.get("fuel_type", ""),
|
||||
# )
|
||||
|
||||
def get_water_heating(self):
|
||||
data = self.raw_data[self.raw_data.index("15.0 Water Heating"):self.raw_data.index("15.1 Hot Water Cylinder")]
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ from sqlalchemy import Column
|
|||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from etl.fileReader.reportType import ReportType
|
||||
from sqlalchemy import DateTime
|
||||
from sqlalchemy.dialects.postgresql import JSON
|
||||
from sqlalchemy import Text
|
||||
|
||||
class BaseModel(SQLModel):
|
||||
# Put primary_key=True in Column; don't pass primary_key to Field
|
||||
|
|
@ -56,7 +58,9 @@ class BaseModel(SQLModel):
|
|||
class uploaded_files(BaseModel, table=True):
|
||||
__tablename__ = "uploaded_files"
|
||||
|
||||
s3_json_uri: Optional[str] = None
|
||||
s3_json_uri: Optional[str] = Field(
|
||||
sa_column=Column(Text, nullable=True)
|
||||
)
|
||||
s3_file_uri: str = Field(index=True)
|
||||
|
||||
doc_type: ReportType = Field(
|
||||
|
|
|
|||
|
|
@ -169,8 +169,10 @@ class GeneralConditionHeatingSystem(BaseModel):
|
|||
is_the_heating_system_in_working_order: bool
|
||||
does_the_occupant_have_a_smart_meter: bool
|
||||
are_there_any_smart_monitoring_devices: bool
|
||||
are_the_gas_and_electricity_meters_accessible: bool
|
||||
dual_or_single_electric_meter: str
|
||||
are_the_gas_meters_accessible: bool
|
||||
are_the_electricity_meters_accessible: bool
|
||||
|
||||
# dual_or_single_electric_meter: str
|
||||
|
||||
class SecondaryHeating(BaseModel):
|
||||
is_there_a_secondary_heating: bool
|
||||
|
|
@ -210,7 +212,7 @@ class HeatingSystem(BaseModel):
|
|||
general_condition: GeneralConditionHeatingSystem
|
||||
main_heating_one: MainHeatingOne
|
||||
main_heating_two: MainHeatingTwo
|
||||
secondary_heating: SecondaryHeating
|
||||
# secondary_heating: SecondaryHeating
|
||||
heating_by_room: HeatingByRoom
|
||||
renewables: Renewables
|
||||
|
||||
|
|
|
|||
|
|
@ -14,17 +14,17 @@ class Logger:
|
|||
c_handler.setLevel(level)
|
||||
|
||||
# File handler
|
||||
f_handler = logging.FileHandler(log_file)
|
||||
f_handler.setLevel(level)
|
||||
# f_handler = logging.FileHandler(log_file)
|
||||
# f_handler.setLevel(level)
|
||||
|
||||
# Formatter
|
||||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
c_handler.setFormatter(formatter)
|
||||
f_handler.setFormatter(formatter)
|
||||
# f_handler.setFormatter(formatter)
|
||||
|
||||
# Add handlers to the logger
|
||||
self.logger.addHandler(c_handler)
|
||||
self.logger.addHandler(f_handler)
|
||||
# self.logger.addHandler(f_handler)
|
||||
|
||||
def get_logger(self):
|
||||
return self.logger
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#poetry run alembic revision --autogenerate -m "update enum"
|
||||
#poetry run alembic revision --autogenerate -m "json_uri is a string"
|
||||
|
||||
poetry run alembic upgrade head
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue