mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-30 13:10:56 +00:00
save current work
This commit is contained in:
parent
d2e57de894
commit
4ff30fd8ef
4 changed files with 388 additions and 5 deletions
|
|
@ -187,6 +187,6 @@ epc_ns = df[
|
|||
epc_ns["job_type"] = "EPC NO SHOW"
|
||||
filtered_dfs.append(epc_ns)
|
||||
|
||||
final_df = pd.concat(filtered_dfs).drop_duplicates().reset_index(drop=True)
|
||||
final_df = pd.concat(filtered_dfs).reset_index(drop=True)
|
||||
|
||||
final_df[['address', 'client', 'job_type']]
|
||||
191
etl/month_end_automation_wave_2_no_3.py
Normal file
191
etl/month_end_automation_wave_2_no_3.py
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
# Wave 2's month end automation
|
||||
|
||||
from tqdm import tqdm
|
||||
from monday import MondayClient
|
||||
from etl.osmosis_complaince_address_to_files import get_all_items, extract_asset_ids
|
||||
from pprint import pprint
|
||||
import pandas as pd
|
||||
import json
|
||||
|
||||
monday_key = "eyJhbGciOiJIUzI1NiJ9.eyJ0aWQiOjQ5ODc2ODQxOCwiYWFpIjoxMSwidWlkIjozNjE3ODAzNCwiaWFkIjoiMjAyNS0wNC0xMVQxMToyMzoxNy40NjdaIiwicGVyIjoibWU6d3JpdGUiLCJhY3RpZCI6MTM5OTc4MjMsInJnbiI6InVzZTEifQ.-2Lit4s46ZF6AXuMW9t0TxIaFLkHqD4Yo-PyM9i2XZY"
|
||||
monday = MondayClient(monday_key)
|
||||
# Platform Housing W2 (in use)
|
||||
board_ids = ["4796290860"]
|
||||
|
||||
|
||||
for board in tqdm(board_ids):
|
||||
board_data = monday.boards.fetch_boards_by_id(board)
|
||||
columns = board_data["data"]["boards"][0]["columns"]
|
||||
col_id_map = {col["title"].lower(): col["id"] for col in columns}
|
||||
reversed_col_id_map = {v: k for k, v in col_id_map.items()}
|
||||
|
||||
|
||||
items = get_all_items(board, monday)
|
||||
|
||||
all_records = []
|
||||
for row in tqdm(items):
|
||||
data = {}
|
||||
data.update({"address": row['name']})
|
||||
data.update({"client": row['group']['title']})
|
||||
for col in row.get("column_values", []):
|
||||
if col.get("id") in reversed_col_id_map:
|
||||
if col.get("type") == "file":
|
||||
value = col.get("value")
|
||||
no_of_files = 0
|
||||
|
||||
if value:
|
||||
value = json.loads(col["value"])
|
||||
no_of_files = len(value.get('files', []))
|
||||
data.update({reversed_col_id_map[col.get("id")]: no_of_files})
|
||||
elif "no show" in reversed_col_id_map[col.get("id")]:
|
||||
def extract_number_from_text(text):
|
||||
number_str = ''
|
||||
|
||||
for char in text:
|
||||
if char.isnumeric():
|
||||
number_str += char
|
||||
elif number_str:
|
||||
break # stop once a number sequence ends
|
||||
|
||||
return int(number_str) if number_str else None
|
||||
text = col.get("text")
|
||||
if text is None:
|
||||
data.update({
|
||||
reversed_col_id_map[col.get("id")]: col.get("text")
|
||||
})
|
||||
else:
|
||||
data.update({
|
||||
reversed_col_id_map[col.get("id")]: extract_number_from_text(text)
|
||||
})
|
||||
else:
|
||||
data.update({
|
||||
reversed_col_id_map[col.get("id")]: col.get("text")
|
||||
})
|
||||
all_records.append(data)
|
||||
|
||||
# Convert to DataFrame
|
||||
df = pd.DataFrame(all_records)
|
||||
|
||||
filtered_dfs = []
|
||||
|
||||
# RA
|
||||
ra = df[
|
||||
df["ra"].str.lower().isin(["completed rdsap 10", "completed rdsap 9.9", "completed", "complete"])
|
||||
].copy()
|
||||
ra["job_type"] = "RA"
|
||||
filtered_dfs.append(ra)
|
||||
|
||||
|
||||
# ATT
|
||||
att = df[
|
||||
df["att"].str.lower().isin(["completed"])
|
||||
].copy()
|
||||
att["job_type"] = "ATT"
|
||||
filtered_dfs.append(att)
|
||||
|
||||
# V1 Coordination
|
||||
v1 = df[
|
||||
df["coordination status"].str.lower().isin(["ima/mtp completed"])
|
||||
].copy()
|
||||
v1["job_type"] = "V1 Coordination"
|
||||
filtered_dfs.append(v1)
|
||||
|
||||
# V2 Coordination
|
||||
_ = df[df["v2 mtp status"].fillna('').str.lower().isin(['v2 ima-mtp completed', 'v2 completed'])].copy()
|
||||
_["job_type"] = "V2 Coordination"
|
||||
filtered_dfs.append(_)
|
||||
|
||||
# V3 Coordination
|
||||
# v3 = df[
|
||||
# df["v3 invoiced"].str.lower().isin(["to be invoiced"])
|
||||
# ].copy()
|
||||
# v3["job_type"] = "V3 Coordination"
|
||||
# filtered_dfs.append(v3)
|
||||
|
||||
# Design stage 1
|
||||
design1 = df[
|
||||
df["design invoice"].str.lower().isin(["complete pending rc"])
|
||||
].copy()
|
||||
design1["job_type"] = "Design"
|
||||
filtered_dfs.append(design1)
|
||||
|
||||
# Design stage 2
|
||||
# design2 = df[
|
||||
# df["revision 2 design invoice"].str.lower().isin(["to invoice"])
|
||||
# ].copy()
|
||||
# design2["job_type"] = "Design Revision"
|
||||
# filtered_dfs.append(design2)
|
||||
|
||||
# Lodgement Phase 1
|
||||
lodg1 = df[
|
||||
df["phase 1 invoice status (lodgement)"].str.lower().isin(["done"])
|
||||
].copy()
|
||||
lodg1["job_type"] = "Lodgement Phase 1"
|
||||
filtered_dfs.append(lodg1)
|
||||
|
||||
# Full Lodgement Phase
|
||||
lodg2 = df[
|
||||
df["lodgement invoice status (lodgement)"].str.lower().isin(["to invoice"])
|
||||
].copy()
|
||||
lodg2["job_type"] = "Full Lodgement"
|
||||
filtered_dfs.append(lodg2)
|
||||
|
||||
# POST EPC
|
||||
post_epc = df[
|
||||
df["post epc"].str.lower().isin(["success", "pics uploaded"])
|
||||
].copy()
|
||||
post_epc["job_type"] = "POST EPC"
|
||||
filtered_dfs.append(post_epc)
|
||||
|
||||
|
||||
# # POST EPR
|
||||
# post_epr = df[
|
||||
# df["post-epc status"].str.lower().isin(["post epr completed"])
|
||||
# ].copy()
|
||||
# post_epr["job_type"] = "POST ATT"
|
||||
# filtered_dfs.append(post_epr)
|
||||
|
||||
|
||||
|
||||
# Post ATT
|
||||
post_att = df[
|
||||
df["post att"].str.lower().isin(["uploaded"])
|
||||
].copy()
|
||||
post_att["job_type"] = "POST ATT"
|
||||
filtered_dfs.append(post_att)
|
||||
|
||||
|
||||
# Retrofit Evaluation
|
||||
retro = df[
|
||||
df["retrofit evaluation"].str.lower().isin(["uploaded", "completed", "to invoice"])
|
||||
].copy()
|
||||
retro["job_type"] = "Retrofit Evaluation"
|
||||
filtered_dfs.append(retro)
|
||||
|
||||
# RA NO Show
|
||||
ra_ns = df[
|
||||
df["ra no show evidence"].fillna(-9999) != df["ra no show invoice"].fillna(-9999)
|
||||
].copy()
|
||||
ra_ns["job_type"] = "RA NO SHOW"
|
||||
filtered_dfs.append(ra_ns)
|
||||
|
||||
|
||||
|
||||
# ATT NO Show
|
||||
att_ns = df[
|
||||
df["att no show evidence"].fillna(-9999) != df["att no show invoice"].fillna(-9999)
|
||||
].copy()
|
||||
att_ns["job_type"] = "ATT NO SHOW"
|
||||
filtered_dfs.append(att_ns)
|
||||
|
||||
|
||||
# Post visit no show
|
||||
epc_ns = df[
|
||||
df["post epc no show evidence"].fillna(-9999) != df["post epc no show invoice"].fillna(-9999)
|
||||
].copy()
|
||||
epc_ns["job_type"] = "EPC NO SHOW"
|
||||
filtered_dfs.append(epc_ns)
|
||||
|
||||
final_df = pd.concat(filtered_dfs).reset_index(drop=True)
|
||||
|
||||
final_df[['address', 'client', 'job_type']]
|
||||
192
etl/month_end_automation_wave_2_no_4.py
Normal file
192
etl/month_end_automation_wave_2_no_4.py
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
# Wave 2's month end automation
|
||||
|
||||
from tqdm import tqdm
|
||||
from monday import MondayClient
|
||||
from etl.osmosis_complaince_address_to_files import get_all_items, extract_asset_ids
|
||||
from pprint import pprint
|
||||
import pandas as pd
|
||||
import json
|
||||
|
||||
monday_key = "eyJhbGciOiJIUzI1NiJ9.eyJ0aWQiOjQ5ODc2ODQxOCwiYWFpIjoxMSwidWlkIjozNjE3ODAzNCwiaWFkIjoiMjAyNS0wNC0xMVQxMToyMzoxNy40NjdaIiwicGVyIjoibWU6d3JpdGUiLCJhY3RpZCI6MTM5OTc4MjMsInJnbiI6InVzZTEifQ.-2Lit4s46ZF6AXuMW9t0TxIaFLkHqD4Yo-PyM9i2XZY"
|
||||
monday = MondayClient(monday_key)
|
||||
# Stonewater (in use)
|
||||
board_ids = ["3584401309"]
|
||||
|
||||
|
||||
for board in tqdm(board_ids):
|
||||
board_data = monday.boards.fetch_boards_by_id(board)
|
||||
columns = board_data["data"]["boards"][0]["columns"]
|
||||
col_id_map = {col["title"].lower(): col["id"] for col in columns}
|
||||
reversed_col_id_map = {v: k for k, v in col_id_map.items()}
|
||||
|
||||
|
||||
items = get_all_items(board, monday)
|
||||
|
||||
all_records = []
|
||||
for row in tqdm(items):
|
||||
data = {}
|
||||
data.update({"address": row['name']})
|
||||
data.update({"client": row['group']['title']})
|
||||
for col in row.get("column_values", []):
|
||||
if col.get("id") in reversed_col_id_map:
|
||||
if col.get("type") == "file":
|
||||
value = col.get("value")
|
||||
no_of_files = 0
|
||||
|
||||
if value:
|
||||
value = json.loads(col["value"])
|
||||
no_of_files = len(value.get('files', []))
|
||||
data.update({reversed_col_id_map[col.get("id")]: no_of_files})
|
||||
elif "no show" in reversed_col_id_map[col.get("id")]:
|
||||
def extract_number_from_text(text):
|
||||
number_str = ''
|
||||
|
||||
for char in text:
|
||||
if char.isnumeric():
|
||||
number_str += char
|
||||
elif number_str:
|
||||
break # stop once a number sequence ends
|
||||
|
||||
return int(number_str) if number_str else None
|
||||
text = col.get("text")
|
||||
if text is None:
|
||||
data.update({
|
||||
reversed_col_id_map[col.get("id")]: col.get("text")
|
||||
})
|
||||
else:
|
||||
data.update({
|
||||
reversed_col_id_map[col.get("id")]: extract_number_from_text(text)
|
||||
})
|
||||
else:
|
||||
data.update({
|
||||
reversed_col_id_map[col.get("id")]: col.get("text")
|
||||
})
|
||||
all_records.append(data)
|
||||
|
||||
# Convert to DataFrame
|
||||
df = pd.DataFrame(all_records)
|
||||
|
||||
filtered_dfs = []
|
||||
|
||||
# RA
|
||||
ra = df[
|
||||
df["ra"].str.lower().isin(["completed rdsap 10", "completed rdsap 9.9"])
|
||||
].copy()
|
||||
ra["job_type"] = "RA"
|
||||
filtered_dfs.append(ra)
|
||||
|
||||
|
||||
# ATT
|
||||
att = df[
|
||||
df["att"].str.lower().isin(["completed"])
|
||||
].copy()
|
||||
att["job_type"] = "ATT"
|
||||
filtered_dfs.append(att)
|
||||
|
||||
# V1 Coordination
|
||||
v1 = df[
|
||||
df["v1 coordination status (ioe,mtp)"].str.lower().isin(["rc complete"])
|
||||
].copy()
|
||||
v1["job_type"] = "V1 Coordination"
|
||||
filtered_dfs.append(v1)
|
||||
|
||||
# V2 Coordination
|
||||
_ = df[df["mtp v2 invoiced"].str.lower().isin(['done', 'needs to be invoiced'])].copy()
|
||||
_["job_type"] = "V2 Coordination"
|
||||
filtered_dfs.append(_)
|
||||
|
||||
# V3 Coordination
|
||||
v3 = df[df["v3 rc status"].str.lower().isin(['uploaded'])].copy()
|
||||
v3["job_type"] = "V3 Coordination"
|
||||
filtered_dfs.append(_)
|
||||
|
||||
# Design stage 1
|
||||
design1 = df[
|
||||
df["design invoice status"].str.lower().isin(["complete", "to invoice"])
|
||||
].copy()
|
||||
design1["job_type"] = "Design"
|
||||
filtered_dfs.append(design1)
|
||||
|
||||
# Design stage 2
|
||||
# design2 = df[
|
||||
# df[""].str.lower().isin(["to invoice"])
|
||||
# ].copy()
|
||||
# design2["job_type"] = "Design Revision"
|
||||
# filtered_dfs.append(design2)
|
||||
|
||||
# Lodgement Phase 1
|
||||
lodg1 = df[
|
||||
df["phase 1 invoice status (lodgement)"].str.lower().isin(["done", "to be invoiced"])
|
||||
].copy()
|
||||
lodg1["job_type"] = "Lodgement Phase 1"
|
||||
filtered_dfs.append(lodg1)
|
||||
|
||||
# Full Lodgement Phase
|
||||
_ = df[
|
||||
df["lodgement status"].str.lower().isin(["lodged"])
|
||||
].copy()
|
||||
lodg2 = _[
|
||||
_["lodgement invoice status"].str.lower().isin([None])
|
||||
].copy()
|
||||
lodg2["job_type"] = "Full Lodgement"
|
||||
filtered_dfs.append(lodg2)
|
||||
|
||||
# POST EPC
|
||||
post_epc = df[
|
||||
df["post epc"].str.lower().isin(["success", "pics uploaded"])
|
||||
].copy()
|
||||
post_epc["job_type"] = "POST EPC"
|
||||
filtered_dfs.append(post_epc)
|
||||
|
||||
|
||||
# # POST EPR
|
||||
# post_epr = df[
|
||||
# df["post-epc status"].str.lower().isin(["post epr completed"])
|
||||
# ].copy()
|
||||
# post_epr["job_type"] = "POST ATT"
|
||||
# filtered_dfs.append(post_epr)
|
||||
|
||||
|
||||
|
||||
# Post ATT
|
||||
post_att = df[
|
||||
df["post att"].str.lower().isin(["uploaded"])
|
||||
].copy()
|
||||
post_att["job_type"] = "POST ATT"
|
||||
filtered_dfs.append(post_att)
|
||||
|
||||
|
||||
# Retrofit Evaluation
|
||||
retro = df[
|
||||
df["retrofit evaluation"].str.lower().isin(["uploaded", "completed", "to invoice"])
|
||||
].copy()
|
||||
retro["job_type"] = "Retrofit Evaluation"
|
||||
filtered_dfs.append(retro)
|
||||
|
||||
# RA NO Show
|
||||
ra_ns = df[
|
||||
df["ra no show evidence"].fillna(-9999) != df["ra no show invoice"].fillna(-9999)
|
||||
].copy()
|
||||
ra_ns["job_type"] = "RA NO SHOW"
|
||||
filtered_dfs.append(ra_ns)
|
||||
|
||||
|
||||
|
||||
# ATT NO Show
|
||||
att_ns = df[
|
||||
df["att no show evidence"].fillna(-9999) != df["att no show invoice"].fillna(-9999)
|
||||
].copy()
|
||||
att_ns["job_type"] = "ATT NO SHOW"
|
||||
filtered_dfs.append(att_ns)
|
||||
|
||||
|
||||
# Post visit no show
|
||||
epc_ns = df[
|
||||
df["post epc no show evidence"].fillna(-9999) != df["post epc no show invoice"].fillna(-9999)
|
||||
].copy()
|
||||
epc_ns["job_type"] = "EPC NO SHOW"
|
||||
filtered_dfs.append(epc_ns)
|
||||
|
||||
final_df = pd.concat(filtered_dfs).reset_index(drop=True)
|
||||
|
||||
final_df[['address', 'client', 'job_type']]
|
||||
|
|
@ -170,7 +170,7 @@ filtered_dfs.append(retro)
|
|||
|
||||
# RA NO Show
|
||||
ra_ns = df[
|
||||
df["ra no show invoice"].str.lower().isin(["to invoice","to invoice (+1 previous no show)", "to invoice (+2 previous no show)"])
|
||||
df["ra no show invoice"].str.lower().isin(["to invoice","to invoice (+1 previous no show)", "to invoice (+2 previous no shows)"])
|
||||
].copy()
|
||||
ra_ns["job_type"] = "RA NO SHOW"
|
||||
filtered_dfs.append(ra_ns)
|
||||
|
|
@ -178,7 +178,7 @@ filtered_dfs.append(ra_ns)
|
|||
|
||||
# ATT NO Show
|
||||
att_ns = df[
|
||||
df["pre att no show invoice"].str.lower().isin(["to invoice","to invoice (+1 previous no show)", "to invoice (+2 previous no show)"])
|
||||
df["pre att no show invoice"].str.lower().isin(["to invoice","to invoice (+1 previous no show)", "to invoice (+2 previous no shows)"])
|
||||
].copy()
|
||||
att_ns["job_type"] = "ATT NO SHOW"
|
||||
filtered_dfs.append(att_ns)
|
||||
|
|
@ -187,11 +187,11 @@ filtered_dfs.append(att_ns)
|
|||
# Post visit no show
|
||||
#post work no show invoice
|
||||
epc_ns = df[
|
||||
df["post works no show invoice"].str.lower().isin(["to invoice","to invoice (+1 previous no show)", "to invoice (+2 previous no show)"])
|
||||
df["post works no show invoice"].str.lower().isin(["to invoice","to invoice (+1 previous no show)", "to invoice (+2 previous no shows)"])
|
||||
].copy()
|
||||
epc_ns["job_type"] = "EPC NO SHOW"
|
||||
filtered_dfs.append(epc_ns)
|
||||
|
||||
final_df = pd.concat(filtered_dfs).drop_duplicates().reset_index(drop=True)
|
||||
final_df = pd.concat(filtered_dfs).reset_index(drop=True)
|
||||
|
||||
final_df[['address', 'client', 'job_type']]
|
||||
Loading…
Add table
Reference in a new issue