diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..ed9033de 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..3ab974fc 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py index 812c1ebb..e191c5ee 100644 --- a/backend/app/db/models/materials.py +++ b/backend/app/db/models/materials.py @@ -19,6 +19,20 @@ class MaterialType(enum.Enum): flat_roof_insulation = "flat_roof_insulation" room_roof_insulation = "room_roof_insulation" + iwi_wall_demolition = "iwi_wall_demolition" + iwi_vapour_barrier = "iwi_vapour_barrier" + iwi_redecoration = "iwi_redecoration" + suspended_floor_demolition = "suspended_floor_demolition" + suspended_floor_redecoration = "suspended_floor_redecoration" + suspended_floor_vapour_barrier = "suspended_floor_vapour_barrier" + solid_floor_demolition = "solid_floor_demolition" + solid_floor_preparation = "solid_floor_preparation" + solid_floor_vapour_barrier = "solid_floor_vapour_barrier" + solid_floor_redecoration = "solid_floor_redecoration" + ewi_wall_demolition = "ewi_wall_demolition" + ewi_wall_preparation = "ewi_wall_preparation" + ewi_wall_redecoration = "ewi_wall_redecoration" + class DepthUnit(enum.Enum): mm = "mm" @@ -27,6 +41,7 @@ class DepthUnit(enum.Enum): class CostUnit(enum.Enum): gbp_sq_meter = "gbp_sq_meter" gbp_per_unit = "gbp_per_unit" + gbp_per_m2 = "gbp_per_m2" class RValueUnit(enum.Enum): @@ -41,9 +56,11 @@ class Material(Base): __tablename__ = 'material' id = Column(Integer, primary_key=True, autoincrement=True) - type = Column(Enum(MaterialType, values_callable=lambda x: [e.value for e in x]), nullable=False) + type = Column(Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False), + nullable=False) + description = Column(String, nullable=False) - depths = Column(String) # You may want to use a specific JSON type depending on the database + depth = Column(String) # You may want to use a specific JSON type depending on the database depth_unit = Column(Enum(DepthUnit, values_callable=lambda x: [e.value for e in x]), nullable=False) cost = Column(String) cost_unit = Column(Enum(CostUnit, values_callable=lambda x: [e.value for e in x]), nullable=False) @@ -57,3 +74,11 @@ class Material(Base): link = Column(String) created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) is_active = Column(Boolean, nullable=False, default=True) + + prime_material_cost = Column(Float) + material_cost = Column(Float) + labour_cost = Column(Float) + labour_hours_per_unit = Column(Float) + plant_cost = Column(Float) + total_cost = Column(Float) + notes = Column(String) diff --git a/etl/costs/README.md b/etl/costs/README.md new file mode 100644 index 00000000..969a3173 --- /dev/null +++ b/etl/costs/README.md @@ -0,0 +1,35 @@ +### Costs ETL Application + +This is a simple application to push the materials costs data to the database. + +#### How to run + +Ensure you have a .env file in the base Model directory with the following variables + +``` +DB_HOST="Your db host" +DB_PORT="Your db port" +DB_USER="Your db user" +DB_PASSWORD="Your db password" +DB_NAME="Your db name" +``` + +Make sure your python path environment variable pouints to the base Model directory. To set the +`PYTHONPATH` environment variable, run the following command from the base Model directory + +``` +export PYTHONPATH=`pwd` +``` + +From the base Model directory, install the requirements by running the following command + +``` +pip install -r etl/costs/requirements.txt +``` + +Then run the following command to run the application + +``` +python etl/costs/app.py +``` + diff --git a/etl/costs/app.py b/etl/costs/app.py new file mode 100644 index 00000000..0117a66e --- /dev/null +++ b/etl/costs/app.py @@ -0,0 +1,98 @@ +import os +import dotenv +import json +import pandas as pd +import numpy as np +from pathlib import Path +from sqlalchemy.orm import Session +from sqlalchemy import create_engine +from backend.app.db.models.materials import Material + +DATA_DIRECTORY = Path(__file__).parent / "local_data" / "Hestia Materials.xlsx" +# Environment file is at the same level as this file +ENV_FILE = Path(__file__).parent / "etl" / "costs" / ".env" +dotenv.load_dotenv(ENV_FILE) + +DB_USERNAME = os.getenv('DB_USERNAME') +DB_PASSWORD = os.getenv('DB_PASSWORD') +DB_HOST = os.getenv('DB_HOST') +DB_PORT = os.getenv('DB_PORT') +DB_NAME = os.getenv('DB_NAME') + + +def push_costs_to_db(engine, costs_df): + """ + Push costs DataFrame to the database. + + :param engine: The SQLAlchemy engine connected to your database. + :param costs_df: The DataFrame containing cost data. + """ + materials = [] + + for _, row in costs_df.iterrows(): + row_dict = row.to_dict() + + # Add other necessary transformations here + + # Create Material object and add it to the list + materials.append(Material(**row_dict)) + + # Use SQLAlchemy session for bulk insert + with Session(engine) as session: + session.bulk_save_objects(materials) + session.commit() + + +def app(): + """ + This application uploads the cost data to our database + + The most recent cost data can be found in OneDrive, in the + shared folder > 04. Product Development > Cost data > Hestia Materials.xlsx + + For the moment, the data is uploaded manually. In the future, we will automate this so the data can be + stored locally and then is uploaded from the local_data folder + :return: + """ + + connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}" + db_string = connection_string.format( + drivername="psycopg2", # You'll need to use psycopg2 driver for PostgreSQL + username=DB_USERNAME, + password=DB_PASSWORD, + server=DB_HOST, + port=DB_PORT, + dbname=DB_NAME, + ) + + db_engine = create_engine(db_string, pool_size=5, max_overflow=5) + + cwi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="cavity_wall_insulation", header=0) + loft_insulation_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="loft_insulation", header=0) + iwi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="internal_wall_insulation", header=0) + suspended_floor_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="suspended_floor_insulation", header=0) + solid_floor_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="solid_floor_insulation", header=0) + ewi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="external_wall_insulation", header=0) + + # Form a single table to be uploaded + costs = pd.concat( + [ + cwi_costs, + loft_insulation_costs, + iwi_costs, + suspended_floor_costs, + solid_floor_costs, + ewi_costs, + ] + ) + + costs = costs.replace({np.nan: None}) + costs["depth"] = costs["depth"].fillna(0) + costs["depth"] = costs["depth"].astype(str) + + # Push the costs to the database + push_costs_to_db(db_engine, costs) + + +if __name__ == "__main__": + app() diff --git a/etl/costs/requirements.txt b/etl/costs/requirements.txt new file mode 100644 index 00000000..7d6afa9e --- /dev/null +++ b/etl/costs/requirements.txt @@ -0,0 +1,5 @@ +pandas==1.5.3 +sqlalchemy==2.0.19 +python-dotenv +psycopg2-binary +openpyxl \ No newline at end of file