survey-extraction/etl/month_end_automation_wave_3_layout.py

# Wave 3's month end automation

from tqdm import tqdm
from monday import MondayClient
from etl.osmosis_complaince_address_to_files import get_all_items, extract_asset_ids
from pprint import pprint
import pandas as pd
import json
import os
from etl.MonthEndUploader import upload_to_month_end_folder
from enum import Enum


monday_key = "eyJhbGciOiJIUzI1NiJ9.eyJ0aWQiOjQ5ODc2ODQxOCwiYWFpIjoxMSwidWlkIjozNjE3ODAzNCwiaWFkIjoiMjAyNS0wNC0xMVQxMToyMzoxNy40NjdaIiwicGVyIjoibWU6d3JpdGUiLCJhY3RpZCI6MTM5OTc4MjMsInJnbiI6InVzZTEifQ.-2Lit4s46ZF6AXuMW9t0TxIaFLkHqD4Yo-PyM9i2XZY"
monday = MondayClient(monday_key)

class BoardID(Enum):
    _9349630181 = "WCHG_WALKUPS_OPERATIONS"
    _8830772914 = "LQ_LONDON"
    _9601691730 = "CARDO_WALES_WEST_WAVE3"
    _9660895490 = "NORTHUMBERLAND_COUNTY_SHDF_WAVE3"
    _9641491000 = "WATFORD_WARM_HOMES"
    _9671463094 = "SEDDON"
    _9929454382 =  "NCHA SHDF Wave 3 Main Operation Board"
    _18232420839 = "Powys County Council"
    _18380751311 = "Devon County Council"
    _18397425376 = "CRG"


board_ids = [
    "9349630181", # WCHG Walkups-Operations
    "8830772914", # "L&Q London"
    "9601691730", # Cardo Wales & West - Wave 3
    "9660895490", # Northumberland County SHDF Wave 3
    "9641491000", # Watford Warm Homes
    "9671463094", # Seddon
    "9929454382", # NCHA SHDF Wave 3 Main Operation Board
    "18232420839", # Powys County Council:
    "18380751311", # Devon County Council
    "", # CRG
]

empty = "Rate card info missing"

rate_cards = {}

check_with_andres = "Check with Andres!!!"
rate_card_data_powys = {
    "job_type": [
        "RA", "ATT", "Coordination Stage 1 v1", "Coordination Stage 1 v2 remodel", "Coordination Stage 1 v3 remodel",
        "Design Archetype Complex", "Design Archetype Simple", "Design Repetitive Simple", "Design Repetitive Complex", "Coordination Stage 2", "Lodgement", "Full lodgement phase 2",
        "Post EPR", "Post EPC", "Post ATT", "retrofit evaluation",
        "RA no show", "ATT no show", "post EPC no show", "Full cost MTP", "measure modelling", "design type not specified"
    ],
    "rate": [
        259, 125,280, 125, 125,
        650, check_with_andres, check_with_andres, 225, 175,
        95, 60, empty, empty, 95,
        60, 40, 40, 40, empty, empty, check_with_andres
    ]
}
rate_cards.update({"18232420839": pd.DataFrame(rate_card_data_powys)})

rate_card_data_devon = {
    "job_type": [
        "RA", "ATT", "Coordination Stage 1 v1", "Coordination Stage 1 v2 remodel", "Coordination Stage 1 v3 remodel",
        "Design Archetype Complex", "Design Archetype Simple", "Design Repetitive Simple", "Design Repetitive Complex", "Coordination Stage 2", "Lodgement", "Full lodgement phase 2",
        "Post EPR", "Post EPC", "Post ATT", "retrofit evaluation",
        "RA no show", "ATT no show", "post EPC no show", "Full cost MTP", "measure modelling", "design type not specified"
    ],
    "rate": [
        empty, empty,empty, empty, empty,
        empty, empty, empty, empty, empty,
        empty, empty, empty, empty, empty,
        empty, empty, empty, empty, empty, empty, check_with_andres
    ]
}
rate_cards.update({"18380751311": pd.DataFrame(rate_card_data_devon)})


rate_card_data_watford_warm_homes = {
    "job_type": [
        "RA", "ATT", "Coordination Stage 1 v1", "Coordination Stage 1 v2 remodel", "Coordination Stage 1 v3 remodel",
        "Design Archetype Complex", "Design Archetype Simple", "Design Repetitive Simple", "Coordination Stage 2", "Lodgement", "Full lodgement phase 2",
        "Post EPR", "Post EPC", "Post ATT", "retrofit evaluation",
        "RA no show", "ATT no show", "post EPC no show", "Full cost MTP", "measure modelling", "design type not specified"
    ],
    "rate": [
        165, empty, empty, empty, empty,
        empty, empty, empty, empty, empty,
        empty, empty, empty, empty, empty,
        empty, empty, empty, empty, empty, check_with_andres
    ]
}
rate_cards.update({"9641491000": pd.DataFrame(rate_card_data_watford_warm_homes)})

rate_card_data_ncha_wave_3 = {
    "job_type": [
        "RA", "ATT", "Coordination Stage 1 v1", "Coordination Stage 1 v2 remodel", "Coordination Stage 1 v3 remodel",
        "Design Archetype Complex", "Design Archetype Simple", "Design Repetitive Simple", "Coordination Stage 2", "Lodgement", "Full lodgement phase 2",
        "Post EPR", "Post EPC", "Post ATT", "retrofit evaluation",
        "RA no show", "ATT no show", "post EPC no show", "Full cost MTP", "measure modelling", "design type not specified"
    ],
    "rate": [
        259, 125, 280, 125, 125,
        650, 415, 195, 175, 135,
        120, "60 - check if EPC or EPR", "150 - check if EPC or EPR", 125, 60,
        45, 45, 45, empty, empty, check_with_andres
    ]
}
rate_cards.update({"9929454382": pd.DataFrame(rate_card_data_ncha_wave_3)})


rate_card_data_l_and_q_london = {
    "job_type": [
        "RA", "ATT", "Coordination Stage 1 v1", "Coordination Stage 1 v2 remodel", "Coordination Stage 1 v3 remodel",
        "Design Archetype Complex", "Design Archetype Simple", "Design Repetitive Simple", "Coordination Stage 2", "Lodgement", "Full lodgement phase 2",
        "Post EPR", "Post EPC", "Post ATT", "retrofit evaluation",
        "RA no show", "ATT no show", "post EPC no show", "Full cost MTP", "measure modelling", "design type not specified"
    ],
    "rate": [
        empty, empty, empty, empty, empty,
        empty, empty, empty, empty, empty,
        empty, empty, empty, empty, empty,
        empty, empty, empty, 280, 150, check_with_andres
    ]
}

rate_cards.update({"8830772914": pd.DataFrame(rate_card_data_l_and_q_london)})


rate_card_data_northhumberland_country_shdf_wave_3 = {
    "job_type": [
        "RA", "ATT", "Coordination Stage 1 v1", "Coordination Stage 1 v2 remodel", "Coordination Stage 1 v3 remodel",
        "Design Archetype Complex", "Design Archetype Simple", "Design Repetitive Simple", "Design Repetitive Complex", "Coordination Stage 2", "Lodgement", "Full lodgement phase 2",
        "Post EPR", "Post EPC", "Post ATT", "retrofit evaluation",
        "RA no show", "ATT no show", "post EPC no show", "Full cost MTP", "measure modelling", "design type not specified"
    ],
    "rate": [
        259, 125, "280 or 310 depending on SH or LA", 125, 125,
        650, 415, 195, 225, 175,
        135, 120, "60 - check", "85 or 90 depending on SH or LA", "110 or 125 depending on SH or LA",
        60, 45, 45, 45, empty, empty, check_with_andres
    ]
}

pd.DataFrame(rate_card_data_northhumberland_country_shdf_wave_3)
rate_cards.update({"9660895490": pd.DataFrame(rate_card_data_northhumberland_country_shdf_wave_3)})

rate_card_data_walk_ups = {
    "job_type": [
        "RA", "ATT", "Coordination Stage 1 v1", "Coordination Stage 1 v2 remodel", "Coordination Stage 1 v3 remodel",
        "Design Archetype Complex", "Design Archetype Simple", "Design Repetitive Simple", "Coordination Stage 2", "Lodgement", "Full lodgement phase 2",
        "Post EPR", "Post EPC", "Post ATT", "retrofit evaluation",
        "RA no show", "ATT no show", "post EPC no show", "Full cost MTP", "measure modelling", "design type not specified"
    ],
    "rate": [
        259, 125, 280, 125, 125,
        650, 415, 195, 175, 135,
        120, "60 to check", 85, 125, 60,
        45, 45, 45, empty, empty, check_with_andres
    ]
}

rate_cards.update({"9349630181": pd.DataFrame(rate_card_data_walk_ups)})

rate_card_data_empty = {
    "job_type": [
        "RA", "ATT", "Coordination Stage 1 v1", "Coordination Stage 1 v2 remodel", "Coordination Stage 1 v3 remodel",
        "Design Archetype Complex", "Design Archetype Simple", "Design Repetitive Simple", "Coordination Stage 2", "Lodgement", "Full lodgement phase 2",
        "Post EPR", "Post EPC", "Post ATT", "retrofit evaluation",
        "RA no show", "ATT no show", "post EPC no show", "Full cost MTP", "measure modelling", "design type not specified"
    ],
    "rate": [
        empty, empty, empty, empty, empty,
        empty, empty, empty, empty, empty,
        empty,empty , empty, empty, empty,
        empty, empty, empty, empty, empty, check_with_andres
    ]
}

rate_card_data_cardo = {
    "job_type": [
        "RA", "ATT", "Coordination Stage 1 v1", "Coordination Stage 1 v2 remodel", "Coordination Stage 1 v3 remodel",
        "Design Archetype Complex", "Design Archetype Simple", "Design Repetitive Simple","Design Repetitive Complex", "Coordination Stage 2", "Lodgement", "Full lodgement phase 2",
        "Post EPR", "Post EPC", "Post ATT", "retrofit evaluation",
        "RA no show", "ATT no show", "post EPC no show", "Full cost MTP", "measure modelling", "design type not specified"
    ],
    "rate": [
        259, 125, 260, 125, 125,
        415, 415, 225, 225, 175,
        135, 120 , "60 - check", 85, 125,
        60, 45, 45, 45, empty, empty, check_with_andres
    ]
}

rate_cards.update({"9601691730": pd.DataFrame(rate_card_data_cardo)})

rate_card_data_seddon = {
    "job_type": [
        "RA", "ATT", "Coordination Stage 1 v1", "Coordination Stage 1 v2 remodel", "Coordination Stage 1 v3 remodel",
        "Design Archetype Complex", "Design Archetype Simple", "Design Repetitive Simple","Design Repetitive Complex", "Coordination Stage 2", "Lodgement", "Full lodgement phase 2",
        "Post EPR", "Post EPC", "Post ATT", "retrofit evaluation",
        "RA no show", "ATT no show", "post EPC no show", "Full cost MTP", "measure modelling", "design type not specified"
    ],
    "rate": [
        259, 125, 280, 125, 125,
        650, 415, 195, 225, 175,
        135, 120 , "60 - check", 85, 125,
        60, 45, 45, 45, empty, empty,check_with_andres
    ]
}

rate_cards.update({"9671463094": pd.DataFrame(rate_card_data_seddon)})

rate_card_data_seddon = {
    "job_type": [
        "RA", "ATT", "Coordination Stage 1 v1", "Coordination Stage 1 v2 remodel", "Coordination Stage 1 v3 remodel",
        "Design Archetype Complex", "Design Archetype Simple", "Design Repetitive Simple","Design Repetitive Complex", "Coordination Stage 2", "Lodgement", "Full lodgement phase 2",
        "Post EPR", "Post EPC", "Post ATT", "retrofit evaluation",
        "RA no show", "ATT no show", "post EPC no show", "Full cost MTP", "measure modelling", "design type not specified"
    ],
    "rate": [
        empty, empty, empty, empty, empty,
        empty, empty, empty, empty, empty,
        empty, empty , empty, empty, empty,
        empty, empty, empty, empty, empty, empty,empty
    ]
}

rate_cards.update({"18397425376": pd.DataFrame(rate_card_data_seddon)})


board_to_record = {}
for board in tqdm(board_ids):
    print(f"working on board {board}")
    board_data = monday.boards.fetch_boards_by_id(board)
    columns = board_data["data"]["boards"][0]["columns"]
    col_id_map = {col["title"].lower(): col["id"] for col in columns}
    reversed_col_id_map = {v: k for k, v in col_id_map.items()}


    items = get_all_items(board, monday)

    all_records = []
    for row in tqdm(items):
        data = {}
        data.update({"address": row['name']})
        client = row['group']['title']
        if client != "Removed From Program" and client != "Removed From Project":
            data.update({"client": row['group']['title']})
            for col in row.get("column_values", []):
                if col.get("id") in reversed_col_id_map:
                    if col.get("type") == "file":
                        value = col.get("value")
                        no_of_files = 0

                        if value:
                            value = json.loads(col["value"])
                            no_of_files = len(value.get('files', []))
                        data.update({reversed_col_id_map[col.get("id")]: no_of_files})
                    else:
                        data.update({
                            reversed_col_id_map[col.get("id")]: col.get("text")
                        })
            all_records.append(data)
    board_to_record.update({board: all_records})

# Convert to DataFrame
for board, all_records in board_to_record.items():
    df = pd.DataFrame(all_records)
    filtered_dfs = []


    def get_df(df, column_name, success_critera, job_name=None):
        _ = pd.DataFrame()
        if column_name in col_id_map:
            _ = df[
                df[column_name].str.lower().isin(success_critera)
            ].copy()
            if job_name:
                _["job_type"] = job_name


        return _


    # RA
    ra = get_df(df, "ra invoicing status", ["to invoice"], "RA")
    if not ra.empty:
        filtered_dfs.append(ra)


    att = get_df(df, "post att invoicing status", ["to invoice"], "ATT")
    if not att.empty:
        filtered_dfs.append(att)

    modeling = get_df(df, "mtp invoicing status", ["modelling to invoice"], "Measure Modelling")
    if not modeling.empty:
        filtered_dfs.append(modeling)

    try:
        # Only needed for one board in wave 3
        full_cost = get_df(df, "mtp invoicing status", ["(V1) Full cost MTP to invoice (no previous modelling)".lower()], "full cost mtp")
        if not full_cost.empty:
            filtered_dfs(full_cost)
    except Exception as e:
        print(e)

    v1 = get_df(df, "mtp invoicing status", ["(v1) ioe/mtp to invoice"], "Coordination Stage 1 v1")
    if not v1.empty:
        filtered_dfs.append(v1)

    v2 = get_df(df, "mtp invoicing status", ["(v2) ioe/mtp to invoice"], "Coordination Stage 1 v2 remodel")
    if not v2.empty:
        filtered_dfs.append(v2)

    v3 = get_df(df, "mtp invoicing status", ["(v3) ioe/mtp to invoice"], "Coordination Stage 1 v3 remodel")
    if not v3.empty:
        filtered_dfs.append(v3)

    # Base filter
    design = get_df(df, "design invoicing status", ["to invoice"])

    # Build each slice
    design1 = get_df(design, "design invoice type", ["archetype (complex)"], "Design Archetype Complex")
    if not design1.empty:
        filtered_dfs.append(design1)

    design2 = get_df(design, "design invoice type", ["archetype (simple)"], "Design Archetype Simple")
    if not design2.empty:
        filtered_dfs.append(design2)

    design3 = get_df(design, "design invoice type", ["repetitive (simple)"], "Design Repetitive Simple")
    if not design3.empty:
        filtered_dfs.append(design3)

    design4 = get_df(design, "design invoice type", ["repetitive (complex)"], "Design Repetitive Complex")
    if not design4.empty:
        filtered_dfs.append(design4)

    # Safe concat of non-empty slices
    parts = [d for d in (design1, design2, design3, design4) if not d.empty]
    if parts:
        all_filtered = pd.concat(parts, ignore_index=False)  # keep original index so we can subtract cleanly
        # Anything not matched by the 4 categories
        remaining_idx = design.index.difference(all_filtered.index)
    else:
        # No matches in any category
        all_filtered = design.iloc[0:0]  # empty DF with same columns
        remaining_idx = design.index

    design_remaining = design.loc[remaining_idx].copy()
    if not design_remaining.empty:
        design_remaining["job_type"] = "Design Type Not Specified"
        filtered_dfs.append(design_remaining)

    # Design Revision
    revision_letter = ['a', 'b', 'c', 'd']
    for letter in revision_letter:
        design = get_df(df, "design revision invoice", [f"rev. {letter} to invoice"], "Design Revision")
        if not design.empty:
            filtered_dfs.append(design)

    ignore = [
        "9349630181", # WCHG Walkups-Operations
        "18232420839", # Powys County Council:
        "8830772914", # "L&Q London"
        "9641491000", # Watford Warm Homes
        "9601691730",
        "9671463094", # Seddon
    ]
    if board not in ignore:
        # Lodgement
        lodg1  = get_df(df, "lodgement invoicing status", ["to invoice"], "Lodgement")
        if not lodg1.empty:
            filtered_dfs.append(lodg1)

    # Retrofit Evaluation
    if board not in ignore:
        re = get_df(df, "evaluation invoicing status", ["to invoice"], "retrofit evaluation")
        if not re.empty:
            filtered_dfs.append(re)

    # POST EPC
    post_epc = get_df(df, "Post EPC invoicing status", ["epc to invoice"], "POST EPC")
    if not post_epc.empty:
        filtered_dfs.append(post_epc)

    # POST EPR
    post_epr = get_df(df, "Post EPR invoicing status", ["epr to invoice"], "POST EPR")
    if not post_epr.empty:
        filtered_dfs.append(post_epr)

    # Retrofit Evaluation might be need for just EPR

    # post att
    post_att = get_df(df, "post att invoicing status", ["to invoice"], "POST ATT")
    if not post_att.empty:
        filtered_dfs.append(post_epc)

    # Coordination Stage 2
    rc = get_df(df, "rc stage 2 invoicing status", ["to invoice"], "Coordination Stage 2")
    if not rc.empty:
        filtered_dfs.append(rc)

    # RA NO Show
    ra_ns  = get_df(df,"ra no show invoice", ["to invoice","to invoice (+1 previous no show)", "to invoice (+2 previous no shows)"], "RA NO SHOW")
    if not ra_ns.empty:
        filtered_dfs.append(ra_ns)


    # ATT NO Show
    att_ns = get_df(df, "pre att no show invoice", ["to invoice","to invoice (+1 previous no show)", "to invoice (+2 previous no shows)"], "ATT NO SHOW")
    if not att_ns.empty:
        filtered_dfs.append(att_ns)


    # Post visit no show
    epc_ns = get_df(df, "post works no show invoice", ["to invoice","to invoice (+1 previous no show)", "to invoice (+2 previous no shows)"], "post EPC NO SHOW")
    if not epc_ns.empty:
        filtered_dfs.append(epc_ns)

    if len(filtered_dfs) != 0:
        final_df = pd.concat(filtered_dfs).reset_index(drop=True)

        final_df["job_type"] = final_df["job_type"].str.lower()
        rate_card_df = rate_cards[board]
        rate_card_df["job_type"] = rate_card_df["job_type"].str.lower()

        # Now perform the merge
        combined_with_rates = final_df.merge(rate_card_df, on="job_type", how="left")
        import datetime
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')

        attribute = ['address', 'client', 'job_type', 'rate']
        combined_with_rates[attribute].to_excel(f'Watford Warm Homes {timestamp}.xlsx', index=False)

        # Upload to sharepoint
        attribute = ['address', 'client', 'job_type', 'rate']
        master_folder_name = BoardID[f"_{board}"].value
        file_name = f"{master_folder_name}_{timestamp}.xlsx"
        combined_with_rates[attribute].to_excel(file_name, index=False)

        file_path = os.path.abspath(file_name)

        upload_to_month_end_folder(file_name, file_path, master_folder_name)

        invoice_name = "rate_card.xlsx"
        file_path = os.path.abspath(invoice_name)
        rate_card_df.to_excel(invoice_name, index=False)
        upload_to_month_end_folder(invoice_name, file_path, master_folder_name)