num_layers: 27
num_experts: 64
num_servers: 32
max_experts_per_server: 128
max_layers_per_server: 2
max_layer_experts_per_server: 4
# topology_type: "dragonfly"
# topology_type: "dragonfly_sparse"
# topology_type: "fat_tree"
topology_type: "fat_tree_2_level"
num_nodes_per_leaf: 1
num_gpus_per_server: 1
train_per_layer_stats_path: "get_deepseek_stats/16b_1000_0.2/train_stats.pt" # comment to ignore
test_activations_path: "get_deepseek_stats/16b_1000_0.2/test_routing_data.pt"
# test_activations_path: "get_deepseek_stats/16b_1000/deepseek_ai_deepseek_moe_16b_chat_raw_routing_data.pt"