dataset: ruler
data_dir: '4096'
model: meta-llama/Meta-Llama-3.1-8B-Instruct
device: null
press_name: adakv_keydiff
compression_ratio: 0.2
key_channel_compression_ratio: null
fraction: 1.0
max_new_tokens: null
max_context_length: 65536
query_aware: false
needle_depth: null
compression_interval: null
target_size: null
hidden_states_buffer_size: null
output_dir: ./icml_results/ruler
log_level: INFO
model_kwargs:
  attn_implementation: flash_attention_2
  dtype: auto
  device_map: auto
press_init_command: AdaKVPress(press=KeyDiffPress(compression_ratio=0.2), alpha_safeguard=0.2)
seed: 42
