import pandas as pd orders = pd.DataFrame( { "order_id": ["A100", "A101", "A102", "A103", "A104"], "customer": [ " Ada Lovelace ", "LIN CHEN", "Maya Patel", " n/a ", None, ], "status": [" Paid ", "PAID", " pending", "", pd.NA], } ) print(f"pandas {pd.__version__}") print() print("source values") print(orders.to_string(index=False)) print() cleaned = orders.copy() cleaned["customer"] = ( cleaned["customer"] .astype("string") .str.strip() .str.replace(r"\s+", " ", regex=True) .replace({"n/a": pd.NA, "N/A": pd.NA}) ) cleaned["status"] = ( cleaned["status"] .astype("string") .str.strip() .str.casefold() .replace({"": pd.NA, "n/a": pd.NA}) ) text_columns = ["customer", "status"] print("cleaned values") print(cleaned.to_string(index=False)) print() print("verification") print(f"rows before: {len(orders)}") print(f"rows after: {len(cleaned)}") print(f"customer dtype: {cleaned['customer'].dtype}") print(f"status dtype: {cleaned['status'].dtype}") print() print("missing values") print(cleaned[text_columns].isna().sum()) print() print("status counts") print(cleaned["status"].value_counts(dropna=False))