# To be filled by the author(s) at the time of submission
# -------------------------------------------------------

# Title of the article:
#  - For a successful replication, it shoudl be prefixed with "[Re]"
#  - For a failed replication, it should be prefixed with "[¬Re]"
#  - For other article types, no instruction (but please, not too long)
title: "[Re] DialSummEval - Evaluation of automatic summarization evaluation metrics"

# List of authors with name, orcid number, email and affiliation
# Affiliation "*" means contact author
authors:
  - name: Patrick Camara
    orcid: 0009-0005-7069-3337
    email: patrickacamara@gmail.com
    affiliations: 1,2

  - name: Mojca Kloos
    orcid: 0009-0006-1199-599X
    email: mojca.kloos@outlook.com
    affiliations: 1,2

  - name: Vasiliki Kyrmanidi
    orcid: 0009-0007-2366-4722
    email: vickykyrmanidi@gmail.com
    affiliations: 1,2

  - name: Agnieszka Kluska
    orcid: 0009-0004-3876-9285
    email: aga.kluska1404@gmail.com
    affiliations: 1,2

  - name: Rorick Terlou
    orcid: 0009-0005-4224-0453
    email: rorick.terlou@gmail.com
    affiliations: 1,2

  - name: Lea Krause
    orcid: 0000-0001-7187-5224
    email: l.krause@vu.nl
    affiliations: 1,* # * is for contact author

# List of affiliations with code (corresponding to author affiliations), name
# and address. You can also use these affiliations to add text such as "Equal
# contributions" as name (with no address).
affiliations:
  - code: 1
    name: Vrije Universiteit Amsterdam
    address: Amsterdam, The Netherlands

  - code: 2
    name: Equal contributions
    address: ""

# List of keywords (adding the programming language might be a good idea)
keywords: rescience c, rescience x, machine learning, summarisation, evaluation, metrics, human annotation

# Code URL and DOI (url is mandatory for replication, doi after acceptance)
# You can get a DOI for your code from Zenodo,
#   see https://guides.github.com/activities/citable-code/
code:
  - url: https://github.com/tricodex/Reproducing_DialSummEval
  - doi:
  - swh: swh:1:dir:845557b246b9705efe933ab6deade75b4496a071

# Date URL and DOI (optional if no data)
data:
  - url:
  - doi:

# Information about the original article that has been replicated
replication:
  - cite: "M. Gao and X. Wan. 'DialSummEval: Revisiting Summarization Evaluation for Dialogues.' In: Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 2022, pp. 5693–5709" # Full textual citation
  - bib: gao2022dialsummeval # Bibtex key (if any) in your bibliography file
  - url: https://aclanthology.org/2022.naacl-main.418/ # URL to the PDF, try to link to a non-paywall version
  - doi: 10.18653/v1/2022.naacl-main.418 # Regular digital object identifier

# Don't forget to surround abstract with double quotes
abstract: "Scope of Reproducibility — In this paper, we perform a reproduction study of the original work of Gao and Wan on the evaluation of automatic dialogue summarization metrics and models. They concluded that (1) few metrics are efficient across dimensions, (2) metrics perform differently in the dialogue summarization task than when evaluating conventional summarization, (3) models tailored for dialogue summarization capture coherence and fluency better than consistency and relevance. Methodology — Three annotators evaluated the outputs of 13 summarization models and their human reference summaries, following the guidelines of the original paper. This took on average 20 hours. A new annotation tool was developed to address the limitations of the Excel interface. An ablation study was conducted with a subset of data annotated with the original process. Finally, we implemented modified parts of the author’s code to apply the metrics over the summaries and compare their scores with our human judgments. All experiments were run on CPU. Results — The original paper’s main claims were reproduced. While not all original authors’ arguments were replicated (e.g. ROUGE scoring higher for relevance), the correlation between metrics and human judgments showed similar tendencies as in [1]. The annotations correlated with the original at a Pearson score of 0.6, sufficient for reproducing main claims. What was easy — The reproducibility strengths of the original paper lie primarily in its profound methodological description. The rich and detailed incorporation of tables made the comparison with our reproduced results fairly easy. What was difficult — The reimplementation of the original paper’s code was relatively complex to navigate and required a fair amount of debugging when running the metrics. Certain deficiencies in the annotation guidelines also resulted in rather time-consuming decision-making for the annotators. Finally, the methodological description of the post-processing of the annotations was relatively unclear and the code calculating the inter-annotator agreement was missing. Communication with original authors — We contacted the paper’s first author, twice, to request the annotation guidelines, the missing code parts, and clarifications regarding the annotation post-processing. Their responses were prompt and helpful."

# Bibliography file (yours)
bibliography: bibliography.bib

# Type of the article
# Type can be:
#  * Editorial
#  * Letter
#  * Replication
type: Replication

# Scientific domain of the article (e.g. Computational Neuroscience)
#  (one domain only & try to be not overly specific)
domain: ML Reproducibility Challenge 2022

# Coding language (main one only if several)
language: Python

# To be filled by the author(s) after acceptance
# -----------------------------------------------------------------------------

# For example, the URL of the GitHub issue where review actually occured
review:
  - url: https://openreview.net/forum?id=3jaZ5tKRyiT&noteId=AHjI9Jw6frY

contributors:
  - name:
    orcid:
    role: editor
  - name:
    orcid:
    role: reviewer
  - name:
    orcid:
    role: reviewer

# This information will be provided by the editor
dates:
  - received: February 3, 2023
  - accepted:
  - published:

# This information will be provided by the editor
article:
  - number: 1 # Article number will be automatically assigned during publication
  - doi: # DOI from Zenodo
  - url: # Final PDF URL (Zenodo or rescience website?)

# This information will be provided by the editor
journal:
  - name: "ReScience C"
  - issn: 2430-3658
  - volume: 9
  - issue: 2
