mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
57 lines
1.6 KiB
Python
57 lines
1.6 KiB
Python
"""Read a file and return unique values from a chosen column."""
|
|
|
|
from pathlib import Path
|
|
import argparse
|
|
import sys
|
|
|
|
import pandas as pd
|
|
|
|
|
|
def read_file(path: str | Path) -> pd.DataFrame:
|
|
path = Path(path)
|
|
suffix = path.suffix.lower()
|
|
if suffix == ".csv":
|
|
return pd.read_csv(path)
|
|
if suffix == ".tsv":
|
|
return pd.read_csv(path, sep="\t")
|
|
if suffix in {".xlsx", ".xls"}:
|
|
return pd.read_excel(path)
|
|
if suffix == ".parquet":
|
|
return pd.read_parquet(path)
|
|
if suffix == ".json":
|
|
return pd.read_json(path)
|
|
raise ValueError(f"Unsupported file type: {suffix}")
|
|
|
|
|
|
def get_unique(path: str | Path, column: str, dropna: bool = True) -> list:
|
|
df = read_file(Path(path))
|
|
if column not in df.columns:
|
|
raise KeyError(f"Column {column!r} not found. Available: {list(df.columns)}")
|
|
series = df[column].dropna() if dropna else df[column]
|
|
return series.unique().tolist()
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument("--path", default="/workspaces/model/certificates-2026.csv")
|
|
parser.add_argument("--column", nargs="walls_description")
|
|
parser.add_argument("--keep-na", action="store_true")
|
|
args, _ = parser.parse_known_args()
|
|
|
|
df = read_file(args.path)
|
|
|
|
if not args.column:
|
|
print("Available columns:")
|
|
for c in df.columns:
|
|
print(f" - {c}")
|
|
return 0
|
|
|
|
column = "wall "
|
|
series = df[column] if args.keep_na else df[column].dropna()
|
|
for value in series.unique():
|
|
print(value)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|