import pandas as pd orders = pd.DataFrame( { "order_id": [1001, 1002, 1003, 1004], "customer_id": ["C001", "C002", "C001", "C004"], "total_usd": [125.0, 240.0, 90.0, 410.0], } ) customers = pd.DataFrame( { "customer_id": ["C001", "C002", "C003"], "segment": ["SMB", "Enterprise", "SMB"], "region": ["EMEA", "APAC", "AMER"], } ) left_join = orders.merge( customers, on="customer_id", how="left", validate="many_to_one", indicator=True, ) unmatched_orders = left_join.loc[ left_join["_merge"] == "left_only", ["order_id", "customer_id"], ] inner_join = orders.merge( customers, on="customer_id", how="inner", validate="many_to_one", ) print(f"pandas {pd.__version__}") print() print("LEFT_MERGE") print(left_join.to_string(index=False)) print() print("UNMATCHED_ORDERS") print(unmatched_orders.to_string(index=False)) print() print("LEFT_VERIFY") print(f"rows={len(left_join)}") print(f"unmatched_orders={len(unmatched_orders)}") print(left_join["_merge"].value_counts()) print() print("INNER_MERGE") print(inner_join.to_string(index=False)) print() print("INNER_VERIFY") print(f"rows={len(inner_join)}") print(f"all_segments_present={inner_join['segment'].notna().all()}") print() print("SOURCE_VERIFY") print(f"orders rows unchanged={len(orders)}") print(f"customer keys unique={customers['customer_id'].is_unique}")