# FastGen hybrid strategies
# https://arxiv.org/abs/2310.01801
cache_strategy: ["hybrid"]
max_cache_length: [1.0]  # [Do not Change] Control compression ratio with min_recovery_frac
global_tokens: 4
min_recovery_frac: 0.85  # Higher is less compression (0.85 means we choose the policy which compresses the most tokens AND recovers 85% of the full attention matrix)
hybrid_strategies:
  - strategy: "special"
  - strategy: "special_punc"
  - strategy: "special_punc_heavy_hitter"
    heavy_hitter_frac: 0.3 # Fraction of important tokens to keep
  - strategy: "special_punc_heavy_hitter_window"
    recent_window: 0.3 # Fraction of recent tokens to keep
    heavy_hitter_frac: 0.3  # Fraction of important tokens to keep
  - strategy: "full"