Created costs elt app

This commit is contained in:
Khalim Conn-Kowlessar 2023-11-23 19:17:58 +00:00
parent ee698c40b5
commit b8ae345076
6 changed files with 167 additions and 4 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Costs" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Costs" project-jdk-type="Python SDK" />
<component name="PythonCompatibilityInspectionAdvertiser">
<option name="version" value="3" />
</component>

View file

@ -19,6 +19,20 @@ class MaterialType(enum.Enum):
flat_roof_insulation = "flat_roof_insulation"
room_roof_insulation = "room_roof_insulation"
iwi_wall_demolition = "iwi_wall_demolition"
iwi_vapour_barrier = "iwi_vapour_barrier"
iwi_redecoration = "iwi_redecoration"
suspended_floor_demolition = "suspended_floor_demolition"
suspended_floor_redecoration = "suspended_floor_redecoration"
suspended_floor_vapour_barrier = "suspended_floor_vapour_barrier"
solid_floor_demolition = "solid_floor_demolition"
solid_floor_preparation = "solid_floor_preparation"
solid_floor_vapour_barrier = "solid_floor_vapour_barrier"
solid_floor_redecoration = "solid_floor_redecoration"
ewi_wall_demolition = "ewi_wall_demolition"
ewi_wall_preparation = "ewi_wall_preparation"
ewi_wall_redecoration = "ewi_wall_redecoration"
class DepthUnit(enum.Enum):
mm = "mm"
@ -27,6 +41,7 @@ class DepthUnit(enum.Enum):
class CostUnit(enum.Enum):
gbp_sq_meter = "gbp_sq_meter"
gbp_per_unit = "gbp_per_unit"
gbp_per_m2 = "gbp_per_m2"
class RValueUnit(enum.Enum):
@ -41,9 +56,11 @@ class Material(Base):
__tablename__ = 'material'
id = Column(Integer, primary_key=True, autoincrement=True)
type = Column(Enum(MaterialType, values_callable=lambda x: [e.value for e in x]), nullable=False)
type = Column(Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False),
nullable=False)
description = Column(String, nullable=False)
depths = Column(String) # You may want to use a specific JSON type depending on the database
depth = Column(String) # You may want to use a specific JSON type depending on the database
depth_unit = Column(Enum(DepthUnit, values_callable=lambda x: [e.value for e in x]), nullable=False)
cost = Column(String)
cost_unit = Column(Enum(CostUnit, values_callable=lambda x: [e.value for e in x]), nullable=False)
@ -57,3 +74,11 @@ class Material(Base):
link = Column(String)
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
is_active = Column(Boolean, nullable=False, default=True)
prime_material_cost = Column(Float)
material_cost = Column(Float)
labour_cost = Column(Float)
labour_hours_per_unit = Column(Float)
plant_cost = Column(Float)
total_cost = Column(Float)
notes = Column(String)

35
etl/costs/README.md Normal file
View file

@ -0,0 +1,35 @@
### Costs ETL Application
This is a simple application to push the materials costs data to the database.
#### How to run
Ensure you have a .env file in the base Model directory with the following variables
```
DB_HOST="Your db host"
DB_PORT="Your db port"
DB_USER="Your db user"
DB_PASSWORD="Your db password"
DB_NAME="Your db name"
```
Make sure your python path environment variable pouints to the base Model directory. To set the
`PYTHONPATH` environment variable, run the following command from the base Model directory
```
export PYTHONPATH=`pwd`
```
From the base Model directory, install the requirements by running the following command
```
pip install -r etl/costs/requirements.txt
```
Then run the following command to run the application
```
python etl/costs/app.py
```

98
etl/costs/app.py Normal file
View file

@ -0,0 +1,98 @@
import os
import dotenv
import json
import pandas as pd
import numpy as np
from pathlib import Path
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from backend.app.db.models.materials import Material
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "Hestia Materials.xlsx"
# Environment file is at the same level as this file
ENV_FILE = Path(__file__).parent / "etl" / "costs" / ".env"
dotenv.load_dotenv(ENV_FILE)
DB_USERNAME = os.getenv('DB_USERNAME')
DB_PASSWORD = os.getenv('DB_PASSWORD')
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
DB_NAME = os.getenv('DB_NAME')
def push_costs_to_db(engine, costs_df):
"""
Push costs DataFrame to the database.
:param engine: The SQLAlchemy engine connected to your database.
:param costs_df: The DataFrame containing cost data.
"""
materials = []
for _, row in costs_df.iterrows():
row_dict = row.to_dict()
# Add other necessary transformations here
# Create Material object and add it to the list
materials.append(Material(**row_dict))
# Use SQLAlchemy session for bulk insert
with Session(engine) as session:
session.bulk_save_objects(materials)
session.commit()
def app():
"""
This application uploads the cost data to our database
The most recent cost data can be found in OneDrive, in the
shared folder > 04. Product Development > Cost data > Hestia Materials.xlsx
For the moment, the data is uploaded manually. In the future, we will automate this so the data can be
stored locally and then is uploaded from the local_data folder
:return:
"""
connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
db_string = connection_string.format(
drivername="psycopg2", # You'll need to use psycopg2 driver for PostgreSQL
username=DB_USERNAME,
password=DB_PASSWORD,
server=DB_HOST,
port=DB_PORT,
dbname=DB_NAME,
)
db_engine = create_engine(db_string, pool_size=5, max_overflow=5)
cwi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="cavity_wall_insulation", header=0)
loft_insulation_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="loft_insulation", header=0)
iwi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="internal_wall_insulation", header=0)
suspended_floor_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="suspended_floor_insulation", header=0)
solid_floor_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="solid_floor_insulation", header=0)
ewi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="external_wall_insulation", header=0)
# Form a single table to be uploaded
costs = pd.concat(
[
cwi_costs,
loft_insulation_costs,
iwi_costs,
suspended_floor_costs,
solid_floor_costs,
ewi_costs,
]
)
costs = costs.replace({np.nan: None})
costs["depth"] = costs["depth"].fillna(0)
costs["depth"] = costs["depth"].astype(str)
# Push the costs to the database
push_costs_to_db(db_engine, costs)
if __name__ == "__main__":
app()

View file

@ -0,0 +1,5 @@
pandas==1.5.3
sqlalchemy==2.0.19
python-dotenv
psycopg2-binary
openpyxl