#!/usr/bin/env bash

## Extracts doc-strings for every declaration.

# See the help text in `lake exe comment_data` for a description of the output format.

# Run either as `scripts/mutate.sh` to run on all of Mathlib (several hours),
# or `scripts/mutate.sh Mathlib.Logic.Hydra` to run on just one file.
# Results will go in `dataset/mutate`.

FLAGS=()
ARGS=()

for arg in "$@"; do
    if [[ $arg == --* ]]; then
        FLAGS+=("$arg")
    else
        ARGS+=("$arg")
    fi
done

if [ ${#ARGS[@]} -eq 0 ]; then
  lake build 
  parallel -j32 ./scripts/mutate.sh ${FLAGS[@]} -- ::: `cat .lake/packages/mathlib/Mathlib.lean | sed -e 's/import //'`
else
  DIR=datasets/mutate
  mkdir -p $DIR
  mod=${ARGS[0]}
  if [ ! -f $DIR/$mod.json ]; then
    if [ ! -f .lake/build/bin/mutation ]; then
      lake build
    fi
    echo $mod
    lake exe mutation ${FLAGS[@]} $mod | python scripts/data_create.py --dir $DIR/$mod.json
  fi
fi