From f74d2671fddefd1ee1805fc9715ae6c3c642f5b3 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 22 Sep 2025 10:30:13 +0000 Subject: [PATCH] re do month encd --- etl/month_end_automation_wave_3_layout.py | 42 +++++++++++-------- ...onth_end_automation_wave_accent_housing.py | 31 +++++++++----- 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/etl/month_end_automation_wave_3_layout.py b/etl/month_end_automation_wave_3_layout.py index 74038ad..bfa2004 100644 --- a/etl/month_end_automation_wave_3_layout.py +++ b/etl/month_end_automation_wave_3_layout.py @@ -244,36 +244,42 @@ for board, all_records in board_to_record.items(): if not v3.empty: filtered_dfs.append(v3) - # Design archetype complex + # Base filter design = get_df(df, "design invoicing status", ["to invoice"]) + + # Build each slice design1 = get_df(design, "design invoice type", ["archetype (complex)"], "Design Archetype Complex") - if not design1.empty : + if not design1.empty: filtered_dfs.append(design1) - # Design archetype simple design2 = get_df(design, "design invoice type", ["archetype (simple)"], "Design Archetype Simple") - if not design1.empty: + if not design2.empty: filtered_dfs.append(design2) - # Design repetitive simple - design3 = get_df(design, "design invoice type", ["repetitive (simple)"], "Design repetitive simple") - if not design1.empty: + design3 = get_df(design, "design invoice type", ["repetitive (simple)"], "Design Repetitive Simple") + if not design3.empty: filtered_dfs.append(design3) - # Design repetitive complex - design4 = get_df(design, "design invoice type", ["repetitive (complex)"], "Design Repetitive complex") - if not design1.empty: + design4 = get_df(design, "design invoice type", ["repetitive (complex)"], "Design Repetitive Complex") + if not design4.empty: filtered_dfs.append(design4) - - # Design not specified - all_filtered = pd.concat([df for df in (design1, design2, design3, design4) if not df.empty]) - design_remaining = design.loc[~design.index.isin(all_filtered.index)] + + # Safe concat of non-empty slices + parts = [d for d in (design1, design2, design3, design4) if not d.empty] + if parts: + all_filtered = pd.concat(parts, ignore_index=False) # keep original index so we can subtract cleanly + # Anything not matched by the 4 categories + remaining_idx = design.index.difference(all_filtered.index) + else: + # No matches in any category + all_filtered = design.iloc[0:0] # empty DF with same columns + remaining_idx = design.index + + design_remaining = design.loc[remaining_idx].copy() if not design_remaining.empty: - design_remaining["job_type"] = "design type not specified" + design_remaining["job_type"] = "Design Type Not Specified" filtered_dfs.append(design_remaining) - - - + # Design Revision revision_letter = ['a', 'b', 'c', 'd'] for letter in revision_letter: diff --git a/etl/month_end_automation_wave_accent_housing.py b/etl/month_end_automation_wave_accent_housing.py index 8e04c38..203ecaf 100644 --- a/etl/month_end_automation_wave_accent_housing.py +++ b/etl/month_end_automation_wave_accent_housing.py @@ -97,33 +97,42 @@ full_cost = get_df(df, "mtp invoicing status", ["(v1) full cost mtp to invoice ( if not full_cost.empty: filtered_dfs.append(full_cost) -# Design archetype complex +# Base filter design = get_df(df, "design invoicing status", ["to invoice"]) + +# Design archetype complex design1 = get_df(design, "design invoice type", ["archetype (complex)"], "Design Archetype Complex") -if not design1.empty : +if not design1.empty: filtered_dfs.append(design1) # Design archetype simple design2 = get_df(design, "design invoice type", ["archetype (simple)"], "Design Archetype Simple") -if not design1.empty: +if not design2.empty: filtered_dfs.append(design2) # Design repetitive simple -design3 = get_df(design, "design invoice type", ["repetitive (simple)"], "Design repetitive simple") -if not design1.empty: +design3 = get_df(design, "design invoice type", ["repetitive (simple)"], "Design Repetitive Simple") +if not design3.empty: filtered_dfs.append(design3) # Design repetitive complex -design4 = get_df(design, "design invoice type", ["repetitive (complex)"], "Design repetitive complex") -if not design1.empty: +design4 = get_df(design, "design invoice type", ["repetitive (complex)"], "Design Repetitive Complex") +if not design4.empty: filtered_dfs.append(design4) -# Design not specified -all_filtered = pd.concat([df for df in (design1, design2, design3, design4) if not df.empty]) -design_remaining = design.loc[~design.index.isin(all_filtered.index)] +# Safe concat +parts = [d for d in (design1, design2, design3, design4) if not d.empty] +if parts: + all_filtered = pd.concat(parts, ignore_index=False) # keep original indices + remaining_idx = design.index.difference(all_filtered.index) +else: + all_filtered = design.iloc[0:0] # empty DF with same schema + remaining_idx = design.index +# Design not specified +design_remaining = design.loc[remaining_idx].copy() if not design_remaining.empty: - design_remaining["job_type"] = "design type not specified" + design_remaining["job_type"] = "Design Type Not Specified" filtered_dfs.append(design_remaining) # Design Revision