# To be filled by the author(s) at the time of submission
# -------------------------------------------------------

# Title of the article:
#  - For a successful replication, it shoudl be prefixed with "[Re]"
#  - For a failed replication, it should be prefixed with "[¬Re]"
#  - For other article types, no instruction (but please, not too long)
title: "[Re] If you like Shapley, then you'll love the core"

# List of authors with name, orcid number, email and affiliation
# Affiliation "*" means contact author
authors:
  - name: Anes Benmerzoug
    orcid: 0000-0002-0270-8446
    email: a.benmerzoug@appliedai.de
    affiliations: 1
    
  - name: Miguel de Benito Delgado
    orcid: 0000-0002-3045-3786
    email: m.debenito@appliedai-institute.de
    affiliations: 2,*

# List of affiliations with code (corresponding to author affiliations), name
# and address. You can also use these affiliations to add text such as "Equal
# contributions" as name (with no address).
affiliations:
  - code:    1
    name:    appliedAI Initiative GmbH
    address: Munich, Germany
    
  - code:    2
    name:    appliedAI Institute gGmbH
    address: Munich, Germany

# List of keywords (adding the programming language might be a good idea)
keywords: rescience c, rescience x, python, data valuation, data-centric ml, machine learning

# Code URL and DOI (url is mandatory for replication, doi after acceptance)
# You can get a DOI for your code from Zenodo,
#   see https://guides.github.com/activities/citable-code/
code:
  - url: https://github.com/aai-institute/mlrc22-like-shapley-love-the-core
  - swh: https://archive.softwareheritage.org/swh:1:dir:294da04ace110a1e2944203314f968a0bbf3c0a1;origin=https://github.com/aai-institute/mlrc22-like-shapley-love-the-core;visit=swh:1:snp:b083dc6b5411472d28f612dcdb71443054e2cb13;anchor=swh:1:rev:dc393eb64a8ec4a793f3bff58ae0db598fb29769
  - doi: 

# Date URL and DOI (optional if no data)
data:
  - url:
  - doi:

# Information about the original article that has been replicated
replication:
 - cite: >
     Yan, Tom, and Ariel D. Procaccia. “If You Like Shapley Then You’ll Love the Core.”
     In Proceedings of the 35th AAAI Conference on Artificial Intelligence, 2021, 6:5751–59.
     Virtual conference: Association for the Advancement of Artificial Intelligence, 2021.
 - bib:  yan_if_2021
 - url:  https://ojs.aaai.org/index.php/AAAI/article/view/16721/16528
 - doi:  https://doi.org/10.1609/aaai.v35i6.16721

# Don't forget to surround abstract with double quotes
abstract: >
  We investigate the results of [1] in the field of data valuation. We repeat their experiments and conclude that the (Monte Carlo) Least Core is sensitive to important characteristics of the ML problem of interest, making it difficult to apply.
  Scope of Reproducibility — We test all experimental claims about Monte Carlo approximations to the Least Core and their application to standard data valuation tasks.
  Methodology — We use the open source library pyDVL for all valuation algorithms. We document all details on dataset choice and generation in this paper, and release all code as open source.
  Results — We were able to reproduce the results on Least Core approximation. For the task of low‐value point identification we observed an inverted performance gap between least core and Shapley values. For high‐value identification, the least core slightly outperformed Shapley values. In two experiments, we must depart from the original paper and arrive at different conclusions.
  What was easy — Open source libraries like DVC and ray enabled efficiently designing and running the experiments.
  What was difficult — Data generation was difficult for dog‐vs‐fish because no code was available. Computing the Monte Carlo Least Core was very sensitive to the choice of utility function. Reproducing some experiments was difficult due to lack of details.
  Communication with original authors — We asked the authors for details on the experimental setup and they kindly and promptly sent us the code used for the paper. This was very useful in understanding all steps taken and in uncovering some weaknesses in the experiments.

# Bibliography file (yours)
bibliography: bibliography.bib
  
# Type of the article
# Type can be:
#  * Editorial
#  * Letter
#  * Replication
type: Replication

# Scientific domain of the article (e.g. Computational Neuroscience)
#  (one domain only & try to be not overly specific)
domain: ML Reproducibility Challenge 2022

# Coding language (main one only if several)
language: python

  
# To be filled by the author(s) after acceptance
# -----------------------------------------------------------------------------

# For example, the URL of the GitHub issue where review actually occured
review: 
  - url: https://openreview.net/forum?id=vWzZQAahuW

contributors:
  - name:
    orcid: 
    role: editor
  - name:
    orcid:
    role: reviewer
  - name:
    orcid:
    role: reviewer

# This information will be provided by the editor
dates:
  - received:
  - accepted:
  - published: 

# This information will be provided by the editor
article:
  - number: 1
  - doi:    # DOI from Zenodo
  - url:    # Final PDF URL (Zenodo or rescience website?)

# This information will be provided by the editor
journal:
  - name:   "ReScience C"
  - issn:   2430-3658
  - volume: 9
  - issue:  2
