import pandas as pd rows = 10000 df = pd.DataFrame( { "order_id": range(100000, 100000 + rows), "region": ["EMEA", "APAC", "AMER", "EMEA"] * (rows // 4), "priority": ["low", "normal", "urgent", "normal"] * (rows // 4), "quantity": [1, 2, 3, 4] * (rows // 4), "revenue": [149.95, 89.50, 212.25, 65.00] * (rows // 4), } ) before = df.memory_usage(deep=True).sum() optimized = df.copy() optimized["region"] = optimized["region"].astype("category") optimized["priority"] = optimized["priority"].astype("category") optimized["order_id"] = pd.to_numeric(optimized["order_id"], downcast="unsigned") optimized["quantity"] = pd.to_numeric(optimized["quantity"], downcast="unsigned") optimized["revenue"] = pd.to_numeric(optimized["revenue"], downcast="float") after = optimized.memory_usage(deep=True).sum() percent = (1 - after / before) * 100 print(f"pandas {pd.__version__}") print() print("source memory bytes") print(before) print() print("optimized memory bytes") print(after) print() print("memory reduction") print(f"{percent:.1f}%") print() print("optimized dtypes") print(optimized.dtypes) print() print("row count preserved") print(len(df) == len(optimized)) print() print("key rows preserved") print( optimized.iloc[:3] .filter(["order_id", "region", "priority", "quantity"]) .to_string(index=False) ) print() print("total revenue difference") print(f"{abs(df['revenue'].sum() - optimized['revenue'].sum()):.6f}")