mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Created costs elt app
This commit is contained in:
parent
ee698c40b5
commit
b8ae345076
6 changed files with 167 additions and 4 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Costs" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyNamespacePackagesService">
|
||||
|
|
|
|||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Costs" project-jdk-type="Python SDK" />
|
||||
<component name="PythonCompatibilityInspectionAdvertiser">
|
||||
<option name="version" value="3" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -19,6 +19,20 @@ class MaterialType(enum.Enum):
|
|||
flat_roof_insulation = "flat_roof_insulation"
|
||||
room_roof_insulation = "room_roof_insulation"
|
||||
|
||||
iwi_wall_demolition = "iwi_wall_demolition"
|
||||
iwi_vapour_barrier = "iwi_vapour_barrier"
|
||||
iwi_redecoration = "iwi_redecoration"
|
||||
suspended_floor_demolition = "suspended_floor_demolition"
|
||||
suspended_floor_redecoration = "suspended_floor_redecoration"
|
||||
suspended_floor_vapour_barrier = "suspended_floor_vapour_barrier"
|
||||
solid_floor_demolition = "solid_floor_demolition"
|
||||
solid_floor_preparation = "solid_floor_preparation"
|
||||
solid_floor_vapour_barrier = "solid_floor_vapour_barrier"
|
||||
solid_floor_redecoration = "solid_floor_redecoration"
|
||||
ewi_wall_demolition = "ewi_wall_demolition"
|
||||
ewi_wall_preparation = "ewi_wall_preparation"
|
||||
ewi_wall_redecoration = "ewi_wall_redecoration"
|
||||
|
||||
|
||||
class DepthUnit(enum.Enum):
|
||||
mm = "mm"
|
||||
|
|
@ -27,6 +41,7 @@ class DepthUnit(enum.Enum):
|
|||
class CostUnit(enum.Enum):
|
||||
gbp_sq_meter = "gbp_sq_meter"
|
||||
gbp_per_unit = "gbp_per_unit"
|
||||
gbp_per_m2 = "gbp_per_m2"
|
||||
|
||||
|
||||
class RValueUnit(enum.Enum):
|
||||
|
|
@ -41,9 +56,11 @@ class Material(Base):
|
|||
__tablename__ = 'material'
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
type = Column(Enum(MaterialType, values_callable=lambda x: [e.value for e in x]), nullable=False)
|
||||
type = Column(Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False),
|
||||
nullable=False)
|
||||
|
||||
description = Column(String, nullable=False)
|
||||
depths = Column(String) # You may want to use a specific JSON type depending on the database
|
||||
depth = Column(String) # You may want to use a specific JSON type depending on the database
|
||||
depth_unit = Column(Enum(DepthUnit, values_callable=lambda x: [e.value for e in x]), nullable=False)
|
||||
cost = Column(String)
|
||||
cost_unit = Column(Enum(CostUnit, values_callable=lambda x: [e.value for e in x]), nullable=False)
|
||||
|
|
@ -57,3 +74,11 @@ class Material(Base):
|
|||
link = Column(String)
|
||||
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
|
||||
is_active = Column(Boolean, nullable=False, default=True)
|
||||
|
||||
prime_material_cost = Column(Float)
|
||||
material_cost = Column(Float)
|
||||
labour_cost = Column(Float)
|
||||
labour_hours_per_unit = Column(Float)
|
||||
plant_cost = Column(Float)
|
||||
total_cost = Column(Float)
|
||||
notes = Column(String)
|
||||
|
|
|
|||
35
etl/costs/README.md
Normal file
35
etl/costs/README.md
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
### Costs ETL Application
|
||||
|
||||
This is a simple application to push the materials costs data to the database.
|
||||
|
||||
#### How to run
|
||||
|
||||
Ensure you have a .env file in the base Model directory with the following variables
|
||||
|
||||
```
|
||||
DB_HOST="Your db host"
|
||||
DB_PORT="Your db port"
|
||||
DB_USER="Your db user"
|
||||
DB_PASSWORD="Your db password"
|
||||
DB_NAME="Your db name"
|
||||
```
|
||||
|
||||
Make sure your python path environment variable pouints to the base Model directory. To set the
|
||||
`PYTHONPATH` environment variable, run the following command from the base Model directory
|
||||
|
||||
```
|
||||
export PYTHONPATH=`pwd`
|
||||
```
|
||||
|
||||
From the base Model directory, install the requirements by running the following command
|
||||
|
||||
```
|
||||
pip install -r etl/costs/requirements.txt
|
||||
```
|
||||
|
||||
Then run the following command to run the application
|
||||
|
||||
```
|
||||
python etl/costs/app.py
|
||||
```
|
||||
|
||||
98
etl/costs/app.py
Normal file
98
etl/costs/app.py
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
import os
|
||||
import dotenv
|
||||
import json
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import create_engine
|
||||
from backend.app.db.models.materials import Material
|
||||
|
||||
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "Hestia Materials.xlsx"
|
||||
# Environment file is at the same level as this file
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "costs" / ".env"
|
||||
dotenv.load_dotenv(ENV_FILE)
|
||||
|
||||
DB_USERNAME = os.getenv('DB_USERNAME')
|
||||
DB_PASSWORD = os.getenv('DB_PASSWORD')
|
||||
DB_HOST = os.getenv('DB_HOST')
|
||||
DB_PORT = os.getenv('DB_PORT')
|
||||
DB_NAME = os.getenv('DB_NAME')
|
||||
|
||||
|
||||
def push_costs_to_db(engine, costs_df):
|
||||
"""
|
||||
Push costs DataFrame to the database.
|
||||
|
||||
:param engine: The SQLAlchemy engine connected to your database.
|
||||
:param costs_df: The DataFrame containing cost data.
|
||||
"""
|
||||
materials = []
|
||||
|
||||
for _, row in costs_df.iterrows():
|
||||
row_dict = row.to_dict()
|
||||
|
||||
# Add other necessary transformations here
|
||||
|
||||
# Create Material object and add it to the list
|
||||
materials.append(Material(**row_dict))
|
||||
|
||||
# Use SQLAlchemy session for bulk insert
|
||||
with Session(engine) as session:
|
||||
session.bulk_save_objects(materials)
|
||||
session.commit()
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This application uploads the cost data to our database
|
||||
|
||||
The most recent cost data can be found in OneDrive, in the
|
||||
shared folder > 04. Product Development > Cost data > Hestia Materials.xlsx
|
||||
|
||||
For the moment, the data is uploaded manually. In the future, we will automate this so the data can be
|
||||
stored locally and then is uploaded from the local_data folder
|
||||
:return:
|
||||
"""
|
||||
|
||||
connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
|
||||
db_string = connection_string.format(
|
||||
drivername="psycopg2", # You'll need to use psycopg2 driver for PostgreSQL
|
||||
username=DB_USERNAME,
|
||||
password=DB_PASSWORD,
|
||||
server=DB_HOST,
|
||||
port=DB_PORT,
|
||||
dbname=DB_NAME,
|
||||
)
|
||||
|
||||
db_engine = create_engine(db_string, pool_size=5, max_overflow=5)
|
||||
|
||||
cwi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="cavity_wall_insulation", header=0)
|
||||
loft_insulation_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="loft_insulation", header=0)
|
||||
iwi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="internal_wall_insulation", header=0)
|
||||
suspended_floor_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="suspended_floor_insulation", header=0)
|
||||
solid_floor_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="solid_floor_insulation", header=0)
|
||||
ewi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="external_wall_insulation", header=0)
|
||||
|
||||
# Form a single table to be uploaded
|
||||
costs = pd.concat(
|
||||
[
|
||||
cwi_costs,
|
||||
loft_insulation_costs,
|
||||
iwi_costs,
|
||||
suspended_floor_costs,
|
||||
solid_floor_costs,
|
||||
ewi_costs,
|
||||
]
|
||||
)
|
||||
|
||||
costs = costs.replace({np.nan: None})
|
||||
costs["depth"] = costs["depth"].fillna(0)
|
||||
costs["depth"] = costs["depth"].astype(str)
|
||||
|
||||
# Push the costs to the database
|
||||
push_costs_to_db(db_engine, costs)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
5
etl/costs/requirements.txt
Normal file
5
etl/costs/requirements.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
pandas==1.5.3
|
||||
sqlalchemy==2.0.19
|
||||
python-dotenv
|
||||
psycopg2-binary
|
||||
openpyxl
|
||||
Loading…
Add table
Reference in a new issue