#!/bin/bash

# Configuration
MODEL_CHECKPOINT="/mnt/data/data/stlm-logic/checkpoints/meta-llama-Llama-3.2-1B-Instruct_random-80-10-10_legal_move_nl_grpo-nl-expt-final/checkpoint-400"
REPRESENTATION_MODE="nl"
INSTRUCT_MODEL="True"
MAX_NEW_TOKENS=256
TEMPERATURE=0.0
TOP_K=1
TOP_P=1.0

# Set mode (options: "game" or "chat")
MODE="chat"  # Change to "chat" for freeform interaction

echo "🚀 Launching Tic-Tac-Toe AI CLI..."

CUDA_VISIBLE_DEVICES=6 python scripts/python/grpo_cli_inference.py \
  --model_checkpoint "$MODEL_CHECKPOINT" \
  --representation_mode "$REPRESENTATION_MODE" \
  --instruct_model "$INSTRUCT_MODEL" \
  --max_new_tokens "$MAX_NEW_TOKENS" \
  --temperature "$TEMPERATURE" \
  --top_k "$TOP_K" \
  --top_p "$TOP_P" \
  --mode "$MODE"
