#!/usr/bin/env bash
set -euo pipefail

SRC=/path/to/home/lltm-h200/data_stable
DST=/path/to/home/lltm/13_msswift/datasets
mkdir -p "$DST"


for f in Pytracify_deleted.jsonl CruxEval.jsonl; do
  in="$SRC/$f"
  base="${f%.jsonl}"
  out="$DST/${base}_nothink.jsonl"


  cp -n "$in" "$in.bak" || true


  jq -c '

    def prepend_no_think:
      if type=="string" then "/no_think\n"+.
      elif type=="array" then

        if (length>0) and (.[0]|has("text")) then
          .[0].text = "/no_think\n"+(.[0].text // "")
          | .
        else
          [{"type":"text","text":"/no_think\n"}] + .
        end
      else .
      end;


    (if has("system") then .system |= prepend_no_think else . end)
    |

    (if has("messages") then
       .messages |= map(
         if (.role=="system") and has("content") then
           .content |= prepend_no_think
         else
           .
         end
       )
     else . end)
  ' "$in" > "$out"

  echo "[done] $out"
done


echo "---- quick stats ----"
for f in Pytracify_deleted_nothink.jsonl CruxEval_nothink.jsonl; do
  path="$DST/$f"
  total=$(wc -l < "$path" || echo 0)
  with_no_think=$(grep -c '/no_think' "$path" || echo 0)
  echo "$f : total_lines=$total, lines_with_/no_think=$with_no_think"
done


echo "---- head -n2 $DST/Pytracify_deleted_nothink.jsonl ----"
head -n2 "$DST/Pytracify_deleted_nothink.jsonl" || true
echo "---- head -n2 $DST/CruxEval_nothink.jsonl ----"
head -n2 "$DST/CruxEval_nothink.jsonl" || true
