#!/usr/bin/env bash

set -euo pipefail

# Simple dataset fetcher for LLMBar and MD-Eval
# Usage examples:
#   ./download_datasets.sh --llmbar-url https://example.com/llmbar.tar.gz
#   ./download_datasets.sh --mdeval-url https://example.com/md_eval.zip
#   ./download_datasets.sh --llmbar-file /path/to/llmbar.tar.gz --mdeval-file /path/to/md_eval.zip

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
TARGET_LLMBAR_DIR="$ROOT_DIR/datasets/llmbar"
TARGET_MDEVAL_DIR="$ROOT_DIR/datasets/md_eval"

LLMBAR_URL=""
MDEVAL_URL=""
LLMBAR_FILE=""
MDEVAL_FILE=""

usage() {
  echo "Usage: $0 [--llmbar-url URL | --llmbar-file FILE] [--mdeval-url URL | --mdeval-file FILE]"
  echo "Downloads/extracts datasets into:"
  echo "  LLMBar -> $TARGET_LLMBAR_DIR"
  echo "  MD-Eval -> $TARGET_MDEVAL_DIR"
}

while [[ $# -gt 0 ]]; do
  case "$1" in
    --llmbar-url)
      LLMBAR_URL="$2"; shift 2;;
    --mdeval-url)
      MDEVAL_URL="$2"; shift 2;;
    --llmbar-file)
      LLMBAR_FILE="$2"; shift 2;;
    --mdeval-file)
      MDEVAL_FILE="$2"; shift 2;;
    -h|--help)
      usage; exit 0;;
    *)
      echo "Unknown arg: $1"; usage; exit 1;;
  esac
done

mkdir -p "$TARGET_LLMBAR_DIR" "$TARGET_MDEVAL_DIR"

download() {
  local url="$1" out="$2"
  echo "Downloading: $url -> $out"
  if command -v curl >/dev/null 2>&1; then
    curl -L --fail --retry 3 -o "$out" "$url"
  elif command -v wget >/dev/null 2>&1; then
    wget -O "$out" "$url"
  else
    echo "Neither curl nor wget found" >&2
    exit 1
  fi
}

extract() {
  local archive="$1" dest="$2"
  echo "Extracting: $archive -> $dest"
  mkdir -p "$dest"
  case "$archive" in
    *.tar.gz|*.tgz)
      tar -xzf "$archive" -C "$dest";;
    *.tar.bz2|*.tbz2)
      tar -xjf "$archive" -C "$dest";;
    *.tar)
      tar -xf "$archive" -C "$dest";;
    *.zip)
      unzip -o "$archive" -d "$dest";;
    *)
      echo "Unsupported archive format: $archive" >&2; exit 1;;
  esac
}

# LLMBar
if [[ -n "$LLMBAR_URL" ]]; then
  tmpfile="$(mktemp)"
  download "$LLMBAR_URL" "$tmpfile"
  extract "$tmpfile" "$TARGET_LLMBAR_DIR"
  rm -f "$tmpfile"
elif [[ -n "$LLMBAR_FILE" ]]; then
  extract "$LLMBAR_FILE" "$TARGET_LLMBAR_DIR"
else
  echo "[Info] Skipping LLMBar (no URL or file provided)"
fi

# MD-Eval
if [[ -n "$MDEVAL_URL" ]]; then
  tmpfile="$(mktemp)"
  download "$MDEVAL_URL" "$tmpfile"
  extract "$tmpfile" "$TARGET_MDEVAL_DIR"
  rm -f "$tmpfile"
elif [[ -n "$MDEVAL_FILE" ]]; then
  extract "$MDEVAL_FILE" "$TARGET_MDEVAL_DIR"
else
  echo "[Info] Skipping MD-Eval (no URL or file provided)"
fi

echo "Done. Contents:"
echo "  LLMBar -> $TARGET_LLMBAR_DIR"
echo "  MD-Eval -> $TARGET_MDEVAL_DIR"


