NCCL_P2P_LEVEL=SYS all_reduce_perf -g 8 -b 1G -e 1G -n 100
