import pandas as pd orders = pd.DataFrame( { "region": [ "East", "East", "East", "West", "West", "West", "North", "North", ], "channel": [ "online", "store", "online", "online", "store", "store", "online", "partner", ], "order_id": [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008], "sales_usd": [1200, 750, 980, 1430, 1020, 650, 500, 780], "units": [12, 8, 10, 14, 9, 5, 4, 7], } ) group_columns = ["region", "channel"] summary = ( orders.groupby(group_columns, as_index=False, sort=False) .agg( order_count=("order_id", "count"), total_sales_usd=("sales_usd", "sum"), avg_order_usd=("sales_usd", "mean"), total_units=("units", "sum"), ) ) summary["avg_order_usd"] = summary["avg_order_usd"].round(2) source_total = orders["sales_usd"].sum() summary_total = summary["total_sales_usd"].sum() expected_groups = len(orders.loc[:, group_columns].drop_duplicates()) assert len(summary) == expected_groups assert source_total == summary_total print(f"pandas {pd.__version__}") print() print("GROUPED_SALES") print(summary.to_string(index=False)) print() print("VERIFY_TOTALS") print(f"source rows: {len(orders)}") print(f"group rows: {len(summary)}") print(f"source sales: {source_total}") print(f"summary sales: {summary_total}") print("verification: grouped totals match source rows")