from pathlib import Path import pandas as pd path = Path("orders.parquet") orders = pd.DataFrame( { "order_id": ["A100", "A101", "A102"], "customer": ["Ada", "Lin", "Maya"], "region": ["EMEA", "APAC", "AMER"], "total_usd": [149.50, 88.00, 212.25], } ) orders.to_parquet( path, engine="pyarrow", compression="snappy", index=False, ) round_trip = pd.read_parquet(path, engine="pyarrow") selected = pd.read_parquet( path, engine="pyarrow", columns=["order_id", "total_usd"], ) print(round_trip.to_string(index=False)) print() print(round_trip.dtypes) print() print(f"rows match: {len(round_trip) == len(orders)}") print(f"columns: {', '.join(round_trip.columns)}") print(f"selected columns: {', '.join(selected.columns)}") print(f"order IDs match: {round_trip['order_id'].tolist() == orders['order_id'].tolist()}")