Model/etl/customers/aiha/epc_data_pull.py

import os
from tqdm import tqdm
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import msgpack
from utils.s3 import read_from_s3
from backend.SearchEpc import SearchEpc
from etl.spatial.OpenUprnClient import OpenUprnClient

load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


def app():
    # Retrieve EPC data for the SHDF AIHA portfolio

    data = pd.read_excel(
        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Khalim Review - 240902 - KSQ - AIHA - SHDF Wave "
        "3 bid - Supplementary information.xlsx",
        sheet_name="All units information",
        header=3
    )

    # Remove the .eg row
    data = data.tail(-1)

    # Remove the bottom 2 rows
    data = data.head(-2)
    data = data.reset_index(drop=True)
    data["row_id"] = data.index

    ammendments = {
        "12 11-18 Schonfeld Square": "12 Schonfeld Square",
        "35 35-37 Schonfeld Square": "35 Schonfeld Square",
        '77 Schonfeld Square': '77 Lordship Road',
        "83 Lordship Road (Schonfeld Square)": "83 Lordship Road",
        "A 80 Bethune Road": "80A Bethune Road",
        "86B Bethune Road": "Flat B, 86 Bethune Road",
        "22 Glendale Road": "22 Glendale Avenue",
        "121 Southbourne Road": "121 Southbourne Grove",
    }

    no_epc = [
        "80B Bethune Road",
        "89B Manor Road",
        "12 Monkville Avenue",
        "9 Greenview",
    ]

    property_type_map = {
        "House, mid-terrace": "House",
        "House, end terrace": "House",
        "House, semi-detached": "House",
        "House, detached": "House",
        "Flat": "Flat",
    }

    epc_data = []
    epc_metadata = []
    for _, home in tqdm(data.iterrows(), total=len(data)):

        # Build address 1 based on if there is:
        # 1) Address letter or number
        # 2) Street address

        modified = False
        address1 = ""
        address1_backup = ""

        if home["Address letter or number"] in ["A", "B", "C"]:

            house_no = home['Street address'].split(' ')[0]
            street = ' '.join(home['Street address'].split(' ')[1:])
            address1 = f"{house_no}{home['Address letter or number']} {street}"

            address1_backup = f"Flat {home['Address letter or number']} {house_no} {street}"
            modified = True

        else:
            if not pd.isnull(home["Address letter or number"]):
                address1 += f"{home['Address letter or number']} "
            if not pd.isnull(home["Street address"]):
                address1 += f"{home['Street address']}"
        address1 = address1.strip()

        if address1.split(" ")[-1].lower() == "rd":
            # Replace with road
            address1 = address1.lower().replace(" rd", " road")

        # Specific ammendments
        if address1 in ammendments:
            address1 = ammendments[address1]

        if address1 in no_epc:
            continue

        searcher = SearchEpc(
            address1=address1,
            postcode=home["Postcode"],
            auth_token=EPC_AUTH_TOKEN,
            os_api_key="",
            property_type=property_type_map[home["Property type"]]
        )
        searcher.find_property(skip_os=True)

        if searcher.newest_epc is None and modified:
            searcher = SearchEpc(
                address1=address1_backup,
                postcode=home["Postcode"],
                auth_token=EPC_AUTH_TOKEN,
                os_api_key="",
                property_type=property_type_map[home["Property type"]]
            )
            searcher.find_property(skip_os=True)

        if searcher.newest_epc is None:
            raise Exception("Not found")

        epc_data.append(
            {
                "row_id": home["row_id"],
                **searcher.newest_epc
            }
        )

        searcher.get_metadata()

        epc_metadata.append(
            {
                "row_id": home["row_id"],
                "address": address1,
                "postcode": home["Postcode"],
                **searcher.metadata
            }
        )

    epc_metadata = pd.DataFrame(epc_metadata)
    epc_data = pd.DataFrame(epc_data)

    # Check matched addresses
    matched_addresses = epc_metadata[["row_id", "address", "postcode"]].copy()
    matched_addresses = matched_addresses.merge(
        data[["row_id", "Address letter or number", "Street address"]], on="row_id", how="inner"
    )

    # We look for differences between the asset list and the EPC data
    comparison_cols = {
        "Property type": [
            {
                "epc_col": "property-type",
                "map": property_type_map
            },
            {
                "epc_col": "built-form",
                "map": {
                    "House, mid-terrace": "Mid-Terrace",
                    "House, end terrace": "End-Terrace",
                    "House, semi-detached": "Semi-Detached",
                    "House, detached": "Detached",
                    "Flat": "Flat",
                }
            }
        ],
        "Energy starting band (EPC)": [
            {
                "epc_col": "current-energy-rating",
                "map": {}
            }
        ],
        "Wall type": [
            {
                "epc_col": "walls-description",
                "search_terms": {
                    "solid": "Solid brick",
                    "cavity": "Cavity wall",
                    "solid - internal lining": "Solid brick",
                }
            }
        ],
        "Roof type": [
            {
                "epc_col": "roof-description",
                "search_terms": {
                    "pitched": "Pitched",
                    "n/a - (flat above)": "another dwelling above"
                }
            }
        ],
        "Floor type": [
            {
                "epc_col": "floor-description",
                "search_terms": {
                    "solid": "Solid",
                    "suspended": "Suspended",
                    "solid - floating floor for services": "Solid"
                }
            }
        ],
    }

    import re
    differences = []
    for asset_list_col, list_of_configs in comparison_cols.items():

        if asset_list_col in ["Wall type", "Roof type", "Floor type"]:
            config = list_of_configs[0]
            # We handle this differently
            remapped = data[["row_id", asset_list_col]].copy()
            # Strip the asset list col incase of leading/trailing spaces
            remapped[asset_list_col] = remapped[asset_list_col].str.strip()
            remapped[asset_list_col] = remapped[asset_list_col].str.lower()
            remapped = remapped.merge(epc_data[["row_id", config["epc_col"]]], on="row_id", how="inner")
            # We do a search term check
            remapped["Match"] = None
            for search_term, epc_term in config["search_terms"].items():
                if "/" in search_term:
                    escaped_search_term = re.escape(search_term)
                    remapped.loc[remapped[asset_list_col].str.contains(escaped_search_term), "Match"] = (
                        remapped.loc[
                            remapped[asset_list_col].str.contains(escaped_search_term), config["epc_col"]
                        ].str.contains(epc_term)
                    )
                else:
                    remapped.loc[remapped[asset_list_col].str.contains(search_term), "Match"] = (
                        remapped.loc[
                            remapped[asset_list_col].str.contains(search_term), config["epc_col"]
                        ].str.contains(epc_term)
                    )

            if pd.isnull(remapped["Match"]).sum():
                raise Exception("Not all matched")

            remapped["Match"] = remapped["Match"].astype(bool)

            if not all(remapped["Match"]):
                differences.append(
                    {
                        "Column": asset_list_col,
                        "Differences": remapped[~remapped["Match"]],
                    }
                )

            continue

        for config in list_of_configs:

            remapped = data[["row_id", asset_list_col]].copy()
            if config["map"]:
                remapped[asset_list_col] = remapped[asset_list_col].map(config["map"])

            # Merge on
            remapped = remapped.merge(epc_data[["row_id", config["epc_col"]]], on="row_id", how="inner")
            remapped["Match"] = remapped[asset_list_col] == remapped[config["epc_col"]]
            if not all(remapped["Match"]):
                differences.append(
                    {
                        "Column": asset_list_col,
                        "Differences": remapped[~remapped["Match"]],
                    }
                )

    # Check for property type
    property_type_differences = differences[0]["Differences"].copy()
    property_type_differences = property_type_differences.merge(
        data[["row_id", "Address letter or number", "Street address"]], on="row_id", how="inner"
    )
    print(property_type_differences)

    # Check for built form
    built_form_differences = differences[1]["Differences"].copy()
    built_form_differences = built_form_differences[built_form_differences["Property type"] != "Flat"]
    built_form_differences = built_form_differences.merge(
        data[["row_id", "Address letter or number", "Street address"]], on="row_id", how="inner"
    )
    print(built_form_differences)

    # Check for energy rating
    energy_rating_differences = differences[2]["Differences"].copy()
    energy_rating_differences = energy_rating_differences.merge(
        data[["row_id", "Address letter or number", "Street address"]], on="row_id", how="inner"
    ).merge(
        epc_data[["row_id", "uprn"]], on="row_id", how="inner"
    )
    print(energy_rating_differences)

    # Check for wall type
    wall_type_differences = differences[3]["Differences"].copy()
    wall_type_differences = wall_type_differences.merge(
        data[["row_id", "Address letter or number", "Street address"]], on="row_id", how="inner"
    ).merge(
        epc_data[["row_id", "uprn"]], on="row_id", how="inner"
    )
    print(wall_type_differences)  # Many wall type differences

    # Check for roof type
    roof_type_differences = differences[4]["Differences"].copy()
    roof_type_differences = roof_type_differences.merge(
        data[["row_id", "Address letter or number", "Street address"]], on="row_id", how="inner"
    ).merge(
        epc_data[["row_id", "uprn"]], on="row_id", how="inner"
    )
    print(roof_type_differences)  # Many roof type differences

    # Check for floor type
    floor_type_differences = differences[5]["Differences"].copy()
    floor_type_differences = floor_type_differences.merge(
        data[["row_id", "Address letter or number", "Street address"]], on="row_id", how="inner"
    ).merge(
        epc_data[["row_id", "uprn"]], on="row_id", how="inner"
    )
    print(floor_type_differences)  # Many floor type differences

    # TODO: 47 Ashtead Road [100021024699] shows solid brick wall on EPC - is probably cavity wall

    # We have the EPC data. Let's check conservation area/historic/listed building status
    portfolio_spatial_data = OpenUprnClient.get_spatial_data(
        epc_data["uprn"].unique().tolist(), bucket_name="retrofit-data-dev"
    )

    portfolio_spatial_data["UPRN"] = portfolio_spatial_data["UPRN"].astype(str)

    spatial_data = data[["row_id", "Planning constraints"]].merge(
        epc_data[["row_id", "uprn"]], on="row_id", how="left",

    ).merge(
        portfolio_spatial_data[["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]],
        left_on="uprn",
        right_on="UPRN", how="left"
    )

    spatial_data[
        (spatial_data["Planning constraints"] == "None")
    ]["conservation_status"].value_counts()

    # One property is in a conservation area, that was not picked up in the asset data
    print(spatial_data[
        (spatial_data["Planning constraints"] == "None") &
        (spatial_data["conservation_status"] == True)
        ].merge(
        data[["row_id", "Address letter or number", "Street address", "Postcode"]], on="row_id", how="left"
    ))

    # All properties match up apart from one where the asset data indicates it's in a conservation area, however
    # the sparital data indicates it's not. There do not appear to be any listed/heritage buildings in the portfolio

    ################################################################
    # Draft archetyping
    ################################################################

    cleaned = read_from_s3(
        s3_file_name="cleaned_epc_data/cleaned.bson",
        bucket_name="retrofit-data-dev"
    )
    cleaned = msgpack.unpackb(cleaned, raw=False)

    epc_data = epc_data.merge(
        pd.DataFrame(cleaned["walls-description"])[
            ['original_description',
             'is_cavity_wall', 'is_filled_cavity', 'is_solid_brick', 'is_system_built', 'is_timber_frame',
             'is_as_built', 'is_assumed', 'insulation_thickness']

        ].rename(
            columns={
                "is_solid_brick": "is_solid_brick_wall",
                "is_system_built": "is_system_built_wall",
                "is_timber_frame": "is_timber_frame_wall",
                "is_assumed": "is_assumed_wall",
                "insulation_thickness": "insulation_thickness_wall"
            }
        ),
        left_on="walls-description",
        right_on="original_description"
    ).merge(
        pd.DataFrame(cleaned["roof-description"])[
            [
                'original_description', 'is_pitched', 'is_roof_room', 'is_loft',
                'is_flat', 'is_thatched', 'is_at_rafters', 'is_assumed',
                'has_dwelling_above', 'insulation_thickness'
            ]
        ].rename(
            columns={
                "is_assumed": "is_assumed_roof",
            }
        ),
        left_on="roof-description",
        right_on="original_description"
    ).merge(
        pd.DataFrame(cleaned["floor-description"])[
            [
                'original_description', 'is_solid', 'is_suspended', 'is_assumed',
                'insulation_thickness'
            ]
        ].rename(
            columns={
                "is_assumed": "is_assumed_floor",
                "insulation_thickness": "insulation_thickness_floor"
            }
        ),
        left_on="floor-description",
        right_on="original_description"
    )

    archetyping_data = data[
        [
            "row_id",
            "Energy starting band (EPC)",
            "Property type",
            "Property year built",
            "Gross internal area (sqm)",
            "Current heating system type",
            "Wall type",
            "Floor type",
            "Roof type",
            "Window type",
            "Location (Floor)",
        ]
    ].merge(
        epc_metadata[["row_id", "floor"]],
        how="left",
        on="row_id"
    ).merge(
        epc_data[
            [
                "row_id", "uprn", "current-energy-rating", "property-type", "built-form", "total-floor-area",
                'is_cavity_wall', 'is_filled_cavity', 'is_solid_brick_wall', 'is_system_built_wall',
                'is_timber_frame_wall', 'is_as_built', 'is_assumed_wall', 'insulation_thickness_wall',
                'is_solid', 'is_suspended', 'is_assumed_floor', 'insulation_thickness_floor',
                'is_pitched', 'is_roof_room', 'is_loft',
                'is_flat', 'is_thatched', 'is_at_rafters', 'is_assumed_roof',
                'has_dwelling_above', 'insulation_thickness', "mainheat-description",
                "local-authority-label"
            ]
        ],
        how="left",
        on="row_id"
    ).merge(
        spatial_data[["row_id", "conservation_status", ]],
        on="row_id",
        how="left"
    )

    if archetyping_data.shape[0] != data.shape[0]:
        raise Exception("Mismatch in data")

    # We create groups analogous to the Energy Company Obligation
    # 0 - 72, 73 - 97, 98 - 199, 200+
    archetyping_data["Floor_area_category"] = pd.cut(
        archetyping_data["Gross internal area (sqm)"],
        bins=[0, 72, 97, 199, 1000],
        labels=["0-72", "73-97", "98-199", "200+"]
    )
    archetyping_data["Floor_area_category_backup"] = pd.cut(
        archetyping_data["total-floor-area"].astype(float),
        bins=[0, 72, 97, 199, 1000],
        labels=["0-72", "73-97", "98-199", "200+"]
    )
    archetyping_data["Floor_area_category"] = archetyping_data["Floor_area_category"].fillna(
        archetyping_data["Floor_area_category_backup"]
    )
    archetyping_data["Floor_area_category"] = archetyping_data["Floor_area_category"].astype(str)
    archetyping_data["Floor_area_category"] = np.where(
        pd.isnull(archetyping_data["Floor_area_category"]),
        "Unknown",
        archetyping_data["Floor_area_category"]
    )
    archetyping_data = archetyping_data.drop(columns=["Floor_area_category_backup"])

    archetyping_data["property-type-reduced"] = np.where(
        archetyping_data["property-type"].isin(["Flat", "Maisionette"]),
        "Flat/Maisonette",
        archetyping_data["property-type"]
    )

    archetyping_data["built-form-reduced"] = np.where(
        archetyping_data["built-form"].isin(["End-Terrace", "Semi-Detached"]),
        "End-Terrace/Semi-Detached",
        archetyping_data["built-form"]
    )
    archetyping_data["built-form-reduced"] = np.where(
        archetyping_data["property-type-reduced"] == "Flat/Maisonette",
        "Flat/Maisonette",
        archetyping_data["built-form-reduced"]
    )

    archetyping_data["Wall type"] = np.where(
        archetyping_data["Wall type"].isin(['Solid ', 'Solid - internal lining ']),
        "Solid",
        archetyping_data["Wall type"]
    )
    archetyping_data["Wall type"] = np.where(
        archetyping_data["Wall type"].isin(['Cavity ', 'cavity ']),
        "Cavity",
        archetyping_data["Wall type"]
    )

    # Proposed remaps based on discoveries
    value_remaps = {
        # 8 Filey Avenue
        "100021040744": {
            "variable": "Property type",
            "newvalue": "House, mid-terrace",
        },
        # 7	Yetev Lev Court
        "100021032043": {
            "variable": "Wall type",
            "newvalue": "Cavity",
        },
        # 14 Yetev Lev Court
        "100021032050": {
            "variable": "Wall type",
            "newvalue": "Cavity",
        },
        # 23 Yetev Lev Court
        "100021032059": {
            "variable": "Wall type",
            "newvalue": "Cavity",
        },
        # 30 Yetev Lev Court
        "100021032066": {
            "variable": "Wall type",
            "newvalue": "Cavity",
        },
        # 34 Yetev Lev Court
        "100021032070": {
            "variable": "Wall type",
            "newvalue": "Cavity",
        },
        # B	86 Bethune Road
        "100021026285": {
            "variable": "Wall type",
            "newvalue": "Solid",
        },
        # A	80 Bethune Road
        "100021026277": {
            "variable": "Wall type",
            "newvalue": "Solid",
        },
        # 140 Kyverdale Road
        "100021052262": {
            "variable": "Property type",
            "newvalue": "House, mid-terrace",
        },
        # 6 Leabourne Road
        "100021053799": {
            "variable": "Wall type",
            "newvalue": "Solid",
        },
        # 22 Britannia Gardens - needs confirmation
        # 7 Satanita Road - needs confirmation
        # 12 Cheltenham Crescent
        "100011402969": {
            "variable": "Wall type",
            "newvalue": "Cavity",
        },
        "100021031752": {
            "variable": "Roof type",
            "newvalue": "Room Roof"
        },
        # 79 Craven Park Road
        "100021169682": {
            "variable": "Roof type",
            "newvalue": "Room Roof"
        },
        # 88 Darenth Road
        "100021036148": {
            "variable": "Roof type",
            "newvalue": "Room Roof"
        },
        "100021036165": {
            "variable": "Roof type",
            "newvalue": "Room Roof"
        },
        "100021036167": {
            "variable": "Roof type",
            "newvalue": "Room Roof"
        },
        "100021053849": {
            "variable": "Roof type",
            "newvalue": "Room Roof"
        },
        "100021054353": {
            "variable": "Roof type",
            "newvalue": "Room Roof"
        },
        "100021054560": {
            "variable": "Roof type",
            "newvalue": "Room Roof"
        },
        "100021059839": {
            "variable": "Roof type",
            "newvalue": "Room Roof"
        },
        "100021059848": {
            "variable": "Roof type",
            "newvalue": "Room Roof"
        }
    }

    # Perform the remaps
    for uprn, config in value_remaps.items():
        archetyping_data[config["variable"]] = np.where(
            archetyping_data["uprn"].astype(str) == uprn, config["newvalue"], archetyping_data[config["variable"]]
        )

    # row_id = data[
    #     # (data["Address letter or number"] == "C") &
    #     (data["Street address"].str.strip() == "41 Moresby Road")
    # ]["row_id"]
    # if len(row_id) != 1:
    #     raise Exception("Fail")
    # print(epc_data[epc_data["row_id"] == row_id.values[0]]["uprn"])

    # Map the year to the age band
    def categorize_year(year):
        if isinstance(year, str):
            # Handle the case where year is in the format '1930s'
            if 's' in year:
                year = int(year[:4])
            else:
                year = int(year)
        else:
            year = int(year)

        # Categorize based on year ranges
        if year < 1900:
            return 'A'
        elif 1900 <= year <= 1929:
            return 'B'
        elif 1930 <= year <= 1949:
            return 'C'
        elif 1950 <= year <= 1966:
            return 'D'
        elif 1967 <= year <= 1975:
            return 'E'
        elif 1976 <= year <= 1982:
            return 'F'
        elif 1983 <= year <= 1990:
            return 'G'
        elif 1991 <= year <= 1995:
            return 'H'
        elif 1996 <= year <= 2002:
            return 'I'
        elif 2003 <= year <= 2006:
            return 'J'
        elif 2007 <= year <= 2011:
            return 'K'
        else:  # year >= 2012
            return 'L'

    archetyping_data["SAP_age_band"] = archetyping_data["Property year built"].apply(
        categorize_year
    )

    # Flag if the property is in London/Manchester
    archetyping_data["Location"] = np.where(
        archetyping_data["local-authority-label"].isin(
            ["Hackney", "Barnet", "Haringey"]
        ),
        "London",
        np.where(
            archetyping_data["local-authority-label"].isin(
                ["Salford", "Bury"]
            ),
            "Manchester",
            "Southend"
        )
    )
    # 9 Greenview is in manchester
    archetyping_data["Location"] = np.where(
        archetyping_data["row_id"] == data[data["Street address"] == "9 Greenview"]["row_id"].values[0],
        "Manchester",
        archetyping_data["Location"]
    )
    # We fix the location for B 80 Bethune Road
    archetyping_data["Location"] = np.where(
        (
            archetyping_data["row_id"].isin(
                data[
                    data["Street address"] == "80 Bethune Road"
                    ]["row_id"].values.tolist()
            )
        ) & (
            archetyping_data["row_id"].isin(
                data[
                    data["Address letter or number"] == "B"
                    ]["row_id"].values.tolist()
            )
        ),
        "London",
        archetyping_data["Location"]
    )

    # Hackney            73 - London
    # Southend-on-Sea     6 - Southend
    # Barnet              4 - London
    # Castle Point        4 - Southend
    # Haringey            3 - London
    # Salford             2 - Manchester
    # Bury                1 - Manchester

    primary_archetyping_cols = [
        'Property type',
        "Location (Floor)",
        'Current heating system type',
        'Wall type',
        'Roof type',
        # "Location",
        # 'current-energy-rating', 'property-type-reduced', 'built-form-reduced', 'is_cavity_wall',
        # 'is_solid_brick_wall', 'is_system_built_wall', 'is_timber_frame_wall', 'is_as_built',
        # 'is_solid', 'is_roof_room',
        # 'is_loft', 'is_flat', 'is_thatched',
        # 'is_at_rafters', 'has_dwelling_above',
        # 'conservation_status',
    ]

    secondary_cols = [
        'SAP_age_band',
        'is_filled_cavity',
        'insulation_thickness_wall'
        'insulation_thickness_floor'
        'insulation_thickness',
        'is_assumed_wall',
        'is_assumed_roof',
        'Floor_area_category'
    ]

    archetypes = archetyping_data[primary_archetyping_cols].drop_duplicates()
    # Hash the variables
    archetypes["archetype_hash"] = archetypes.apply(
        lambda x: hash(tuple(x.values)),
        axis=1
    )
    archetypes = archetypes.sort_values("archetype_hash", ascending=True)
    archetypes = archetypes.reset_index(drop=True)
    archetypes["archetype_id"] = archetypes.index

    archetypes.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/basic-archetypes.csv", index=False)

    # We match properties to archetypes
    archetyping_data = archetyping_data.merge(
        archetypes,
        on=primary_archetyping_cols,
        how="left"
    )

    # We should choose a representative property for each archetype
    archetyping_data = archetyping_data.merge(
        epc_metadata[["row_id", "days_since_last_epc"]],
        how="left",
        on="row_id"
    )

    # Mark the property with the oldest EPC as the representative property
    representative_properties = archetyping_data.sort_values(
        ["archetype_id", "days_since_last_epc"], ascending=[True, False]
    ).drop_duplicates("archetype_id")

    archetyping_data["for_sample"] = np.where(
        archetyping_data["row_id"].isin(representative_properties["row_id"]),
        True,
        False
    )

    # We save the archetyping data
    archetyping_data.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/archetyping_data.csv",
                            index=False)
    # Save the EPC data
    epc_data.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/epc_data.csv", index=False)
    # Save the spatial data
    spatial_data = data[["row_id", "Address letter or number", "Street address", "Postcode"]].merge(
        spatial_data,
        on="row_id",
        how="left"
    )
    spatial_data.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/spatial_data.csv", index=False)

    # Save archetyping data
    archetyping_data = data[["row_id", "Address letter or number", "Street address", "Postcode"]].merge(
        archetyping_data,
        on="row_id",
        how="left"
    )
    archetyping_data.to_csv(
        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/archetyping_data.csv",
        index=False
    )