analysis_report: false
api_key: EMPTY
api_url: null
chat_template: null
dataset_args:
  hellaswag:
    dataset_id: modelscope/hellaswag
    description: HellaSwag is a benchmark for commonsense reasoning in natural language
      understanding tasks. It consists of multiple-choice questions where the model
      must select the most plausible continuation of a given context.
    eval_split: validation
    extra_params: {}
    few_shot_num: 0
    few_shot_random: false
    filters: null
    metric_list:
    - AverageAccuracy
    model_adapter: multiple_choice_logits
    name: hellaswag
    output_types:
    - multiple_choice_logits
    - generation
    pretty_name: HellaSwag
    prompt_template: '{query}'
    query_template: null
    subset_list:
    - default
    system_prompt: null
    tags:
    - Commonsense
    - MCQ
    - Knowledge
    train_split: train
dataset_dir: /data/models/modelscope/datasets
dataset_hub: modelscope
datasets:
- hellaswag
debug: false
dry_run: false
eval_backend: Native
eval_batch_size: 1
eval_config: null
eval_type: checkpoint
generation_config:
  do_sample: false
  max_length: 2048
  max_new_tokens: 512
  temperature: 1.0
  top_k: 50
  top_p: 1.0
ignore_errors: false
judge_model_args: {}
judge_strategy: auto
judge_worker_num: 1
limit: 200
mem_cache: false
model: /data/models/m2lorase_hellas-3B/
model_args:
  precision: torch.float16
  revision: master
model_id: ''
model_task: text_generation
outputs: null
seed: 42
stage: all
stream: false
template_type: null
timeout: null
use_cache: null
work_dir: ./outputs/20250829_130639
