num_layers: 58
num_experts: 256
num_servers: 256
max_experts_per_server: 64
# max_experts_per_server: 256
max_layers_per_server: 4
max_layer_experts_per_server: 1
# topology_type: "dragonfly"
# topology_type: "dragonfly_sparse"
topology_type: "fat_tree"
# topology_type: "fat_tree_2_level"
num_nodes_per_leaf: 4
num_gpus_per_server: 4
train_per_layer_stats_path: "get_deepseek_stats/r1_train_100/QuixiAI_DeepSeek-R1-AWQ_stats_matrix_train_100.pt" # comment to ignore
test_activations_path: "get_deepseek_stats/r1_test_50/QuixiAI_DeepSeek-R1-AWQ_raw_routing_data_test_50.pt"
# test_activations_path: "get_deepseek_stats/16b_1000/deepseek_ai_deepseek_moe_16b_chat_raw_routing_data.pt"