
```bash
python3 latency_traces.py --dataset gsm8k --model_id 'deepseek-ai/DeepSeek-R1-Distill-Llama-8B' --prediction L14_mlp --epoch 2 --num_chains 10 --threshold_val_pct 70 --warm_up 3 --patience_high 2 --interval 16 --consensus_frac 0.6 --voting_frac 0.8 --resample_chains False --record_dir branch10
python3 latency_traces.py --dataset mmlu --model_id 'deepseek-ai/DeepSeek-R1-Distill-Llama-8B' --prediction L14_mlp --epoch 2 --num_chains 10 --threshold_val_pct 80 --warm_up 0 --patience_high 2 --interval 80 --consensus_frac 0.4 --voting_frac 1 --resample_chains False --record_dir branch10
python3 latency_traces.py --dataset math --model_id 'deepseek-ai/DeepSeek-R1-Distill-Llama-8B' --prediction L14_mlp --epoch 2 --num_chains 10 --threshold_val_pct 80 --warm_up 2 --patience_high 2 --interval 80 --consensus_frac 0.6 --voting_frac 0.8 --resample_chains False --record_dir branch10

python3 latency_measurement_vllm.py --dataset gsm8k --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --model_url "http://localhost:30000/v1" --trace_dir branch10 --mode default
python3 latency_measurement_vllm.py --dataset mmlu --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --model_url "http://localhost:30000/v1" --trace_dir branch10 --mode default
python3 latency_measurement_vllm.py --dataset math --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --model_url "http://localhost:30000/v1" --trace_dir branch10 --mode default

python3 latency_measurement_vllm.py --dataset gsm8k --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --model_url "http://localhost:30000/v1" --trace_dir branch10_dynasor --mode dynasor --interval 64
python3 latency_measurement_vllm.py --dataset mmlu --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --model_url "http://localhost:30000/v1" --trace_dir branch10_dynasor --mode dynasor --interval 64
python3 latency_measurement_vllm.py --dataset math --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --model_url "http://localhost:30000/v1" --trace_dir branch10_dynasor --mode dynasor --interval 64

python3 latency_measurement_vllm.py --dataset gsm8k --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --model_url "http://localhost:30000/v1" --trace_dir branch10 --mode shortm --m 0.6
python3 latency_measurement_vllm.py --dataset mmlu --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --model_url "http://localhost:30000/v1" --trace_dir branch10 --mode shortm --m 0.6
python3 latency_measurement_vllm.py --dataset math --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --model_url "http://localhost:30000/v1" --trace_dir branch10 --mode shortm --m 3

python3 latency_measurement_vllm.py --dataset gsm8k --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --model_url "http://localhost:30000/v1" --trace_dir branch10 --mode duchess
python3 latency_measurement_vllm.py --dataset mmlu --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --model_url "http://localhost:30000/v1" --trace_dir branch10 --mode duchess
python3 latency_measurement_vllm.py --dataset math --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --model_url "http://localhost:30000/v1" --trace_dir branch10 --mode duchess

python3 latency_measurement_torch.py --dataset gsm8k --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --trace_dir branch10
python3 latency_measurement_torch.py --dataset mmlu --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --trace_dir branch10
python3 latency_measurement_torch.py --dataset math --model_id deepseek-ai/DeepSeek-R1-Distill-Llama-8B --trace_dir branch10
```

```bash
CUDA_VISIBLE_DEVICES=0 vllm serve deepseek-ai/DeepSeek-R1-Distill-Llama-8B -tp 1 --enable-prefix-caching --port 30000
```
