import pandas as pd from pandas.api.types import CategoricalDtype df = pd.DataFrame( { "ticket": [101, 102, 103, 104, 105, 106], "team": ["api", "frontend", "api", "ops", "frontend", "api"], "priority": ["normal", "urgent", "low", "normal", "low", "urgent"], } ) print(f"pandas {pd.__version__}") print() print("source dtypes") print(df.dtypes) print() df["team"] = df["team"].astype("category") print("team dtype") print(df["team"].dtype) print() print("team categories") print(df["team"].cat.categories) print() label_sample = pd.Series(["api", "frontend", "ops", "api"] * 1000, dtype="str") memory = pd.Series( { "str_bytes": label_sample.memory_usage(deep=True), "category_bytes": label_sample.astype("category").memory_usage(deep=True), } ) print("memory check") print(memory) print() priority_dtype = CategoricalDtype( categories=["low", "normal", "urgent"], ordered=True, ) df["priority"] = df["priority"].astype(priority_dtype) print("priority dtype") print(df["priority"].dtype) print() print("priority categories") print(df["priority"].cat.categories) print() print("priority ordered") print(df["priority"].cat.ordered) print() print("sorted by priority") print(df.sort_values("priority").filter(["ticket", "priority"]).to_string(index=False)) print() print("missing after conversion") print(df.filter(["team", "priority"]).isna().sum())