stages:
  
  # Data
  # ====================================================================================
  
  fetch-data:
    matrix:
      dataset: ${active.datasets}
    cmd: python -m scripts.fetch_data --dataset-name ${item.dataset}
    params:
      - datasets.${item.dataset}.openml_id
    deps:
      - scripts/fetch_data.py
      - src/re_classwise_shapley/io.py
    outs:
      - output/raw/${item.dataset}:
          persist: true

  preprocess-data:
    matrix:
      dataset: ${active.datasets}
    cmd: python -m scripts.preprocess_data --dataset-name ${item.dataset}
    params:
      - datasets.${item.dataset}
    deps:
      - scripts/preprocess_data.py
      - src/re_classwise_shapley/filter.py
      - src/re_classwise_shapley/io.py
      - output/raw/${item.dataset}
    outs:
      - output/preprocessed/${item.dataset}:
          persist: true

  # Experiments
  # ====================================================================================

  sample-data:
    matrix:
      experiment: ${active.experiments}
      dataset: ${active.datasets}
    cmd: >
      python -m scripts.sample_data
      --experiment-name ${item.experiment}
      --dataset-name ${item.dataset}
    params:
      - experiments.${item.experiment}.sampler
    deps:
      - scripts/sample_data.py
      - output/preprocessed/${item.dataset}
    outs:
      - output/sampled/${item.experiment}/${item.dataset}:
          persist: true

  calculate-threshold-characteristics:
    matrix:
      experiment: ${active.experiments}
      dataset: ${active.datasets}
      repetition: ${active.repetitions}
    cmd: >
      python -m scripts.calculate_threshold_characteristics
      --experiment-name ${item.experiment}
      --dataset-name ${item.dataset}
      --repetition-id ${item.repetition}
    params:
      - settings.threshold_characteristics
    deps:
      - scripts/calculate_threshold_characteristics.py
      - output/sampled/${item.experiment}/${item.dataset}
    outs:
      - output/threshold_characteristics/${item.experiment}/${item.dataset}/${item.repetition}:
          persist: true

  calculate-values:
    matrix:
      experiment: ${active.experiments}
      dataset: ${active.datasets}
      model: ${active.models}
      method: ${active.valuation_methods}
      repetition: ${active.repetitions}
    cmd: >
      python -m scripts.calculate_values
      --experiment-name ${item.experiment} 
      --dataset-name ${item.dataset}
      --model-name ${item.model} 
      --valuation-method-name ${item.method}
      --repetition-id ${item.repetition}
    params:
      - models.${item.model}
      - valuation_methods.${item.method}
    deps:
      - scripts/calculate_values.py
      - src/re_classwise_shapley/valuation_methods.py
      - output/sampled/${item.experiment}/${item.dataset}
    outs:
      - output/values/${item.experiment}/${item.model}/${item.dataset}/${item.repetition}:
          persist: true

  # TODO Make more concise once https://github.com/iterative/dvc/issues/9948 is closed.
  evaluate-curves-point-removal:
    matrix:
      dataset: ${active.datasets}
      model: ${active.models}
      repetition: ${active.repetitions}
      method: ${active.valuation_methods}
      curve: ${experiments.point_removal.curves}
    cmd: python -m scripts.evaluate_curves
      --experiment-name point_removal
      --dataset-name ${item.dataset}
      --model-name ${item.model}
      --valuation-method-name ${item.method}
      --repetition-id ${item.repetition}
      --curve-name ${item.curve}
    params:
      - experiments.point_removal.curves.${item.curve}
    deps:
      - scripts/evaluate_curves.py
      - src/re_classwise_shapley/curve.py
      - output/values/point_removal/${item.model}/${item.dataset}/${item.repetition}/valuation.${item.method}.pkl
    outs:
      - output/curves/point_removal/${item.model}/${item.dataset}/${item.repetition}/${item.method}/${item.curve}.csv:
          persist: true

  evaluate-curves-noise-removal:
    matrix:
      dataset: ${active.datasets}
      model: ${active.models}
      repetition: ${active.repetitions}
      method: ${active.valuation_methods}
      curve: ${experiments.noise_removal.curves}
    cmd: python -m scripts.evaluate_curves
      --experiment-name noise_removal
      --dataset-name ${item.dataset}
      --model-name ${item.model}
      --valuation-method-name ${item.method}
      --repetition-id ${item.repetition}
      --curve-name ${item.curve}
    params:
      - experiments.noise_removal.curves.${item.curve}
    deps:
      - scripts/evaluate_curves.py
      - src/re_classwise_shapley/curve.py
      - output/values/noise_removal/${item.model}/${item.dataset}/${item.repetition}/valuation.${item.method}.pkl
    outs:
      - output/curves/noise_removal/${item.model}/${item.dataset}/${item.repetition}/${item.method}/${item.curve}.csv:
          persist: true

  # TODO Make more concise once https://github.com/iterative/dvc/issues/9948 is closed.
  evaluate-metrics-point-removal:
    matrix:
      dataset: ${active.datasets}
      model: ${active.models}
      repetition: ${active.repetitions}
      method: ${active.valuation_methods}
      metric: ${experiments.point_removal.metrics}
    cmd: python -m scripts.evaluate_metrics
      --experiment-name point_removal
      --dataset-name ${item.dataset}
      --model-name ${item.model}
      --valuation-method-name ${item.method}
      --repetition-id ${item.repetition}
      --metric-name ${item.metric}
    params:
      - experiments.point_removal.metrics.${item.metric}
    deps:
      - scripts/evaluate_metrics.py
      - src/re_classwise_shapley/metric.py
      - output/values/point_removal/${item.model}/${item.dataset}/${item.repetition}/valuation.${item.method}.pkl
    outs:
      - output/metrics/point_removal/${item.model}/${item.dataset}/${item.repetition}/${item.method}/${item.metric}:
          persist: true
  
  evaluate-metrics-noise-removal:
    matrix:
      dataset: ${active.datasets}
      model: ${active.models}
      repetition: ${active.repetitions}
      method: ${active.valuation_methods}
      metric: ${experiments.noise_removal.metrics}
    cmd: python -m scripts.evaluate_metrics
      --experiment-name noise_removal
      --dataset-name ${item.dataset}
      --model-name ${item.model}
      --valuation-method-name ${item.method}
      --repetition-id ${item.repetition}
      --metric-name ${item.metric}
    params:
      - experiments.noise_removal.metrics.${item.metric}
    deps:
      - scripts/evaluate_metrics.py
      - src/re_classwise_shapley/metric.py
      - output/values/noise_removal/${item.model}/${item.dataset}/${item.repetition}/valuation.${item.method}.pkl
    outs:
      - output/metrics/noise_removal/${item.model}/${item.dataset}/${item.repetition}/${item.method}/${item.metric}:
          persist: true
  
  render-plots:
    matrix:
      experiment: ${active.experiments}
      model: ${active.models}
    cmd: python -m scripts.render_plots
      --experiment-name ${item.experiment}
      --model-name ${item.model}
    params:
      - experiments.${item.experiment}.metrics
    deps:
      - scripts/render_plots.py
      - output/curves/${item.experiment}/${item.model}
      - output/metrics/${item.experiment}/${item.model}
      - output/threshold_characteristics/${item.experiment}
    outs:
      - output/plots/${item.experiment}/${item.model}
  
  