
















import csv
import os

import datasets

_CITATION = 

_DESCRIPTION = 

_HOMEPAGE = "https://github.com/hendrycks/ethics"


_LICENSE = ""

_URLS = "https://people.eecs.berkeley.edu/~hendrycks/ethics.tar"


class EthicsConfig(datasets.BuilderConfig):
    

    def __init__(self, prefix, features, **kwargs):
        
        
        super().__init__(version=datasets.Version("0.0.1"), **kwargs)
        self.prefix = prefix
        self.features = features


class HendrycksEthics(datasets.GeneratorBasedBuilder):
    

    BUILDER_CONFIGS = [
        EthicsConfig(
            name="commonsense",
            prefix="cm",
            features=datasets.Features(
                {
                    "label": datasets.Value("int32"),
                    "input": datasets.Value("string"),
                    "is_short": datasets.Value("bool"),
                    "edited": datasets.Value("bool"),
                }
            ),
            description="The Commonsense subset contains examples focusing on moral standards and principles that most people intuitively accept.",
        ),
        EthicsConfig(
            name="deontology",
            prefix="deontology",
            features=datasets.Features(
                {
                    "group_id": datasets.Value("int32"),
                    "label": datasets.Value("int32"),
                    "scenario": datasets.Value("string"),
                    "excuse": datasets.Value("string"),
                }
            ),
            description="The Deontology subset contains examples focusing on whether an act is required, permitted, or forbidden according to a set of rules or constraints",
        ),
        EthicsConfig(
            name="justice",
            prefix="justice",
            features=datasets.Features(
                {
                    "group_id": datasets.Value("int32"),
                    "label": datasets.Value("int32"),
                    "scenario": datasets.Value("string"),
                }
            ),
            description="The Justice subset contains examples focusing on how a character treats another person",
        ),
        EthicsConfig(
            name="utilitarianism",
            prefix="util",
            features=datasets.Features(
                {
                    "activity": datasets.Value("string"),
                    "baseline": datasets.Value("string"),
                    "rating": datasets.Value("string"),  
                }
            ),
            description="The Utilitarianism subset contains scenarios that should be ranked from most pleasant to least pleasant for the person in the scenario",
        ),
        EthicsConfig(
            name="virtue",
            prefix="virtue",
            features=datasets.Features(
                {
                    "group_id": datasets.Value("int32"),
                    "label": datasets.Value("int32"),
                    "scenario": datasets.Value("string"),
                    "trait": datasets.Value("string"),
                }
            ),
            description="The Virtue subset contains scenarios focusing on whether virtues or vices are being exemplified",
        ),
    ]

    def _info(self):
        return datasets.DatasetInfo(
            description=f"{_DESCRIPTION}\n{self.config.description}",
            features=self.config.features,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )

    def _split_generators(self, dl_manager):
        urls = _URLS
        data_dir = dl_manager.download_and_extract(urls)
        return [
            datasets.SplitGenerator(
                name=datasets.Split.TRAIN,
                
                gen_kwargs={
                    "filepath": os.path.join(
                        data_dir,
                        "ethics",
                        self.config.name,
                        f"{self.config.prefix}_train.csv",
                    ),
                    "split": "train",
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.TEST,
                
                gen_kwargs={
                    "filepath": os.path.join(
                        data_dir,
                        "ethics",
                        self.config.name,
                        f"{self.config.prefix}_test.csv",
                    ),
                    "split": "test",
                },
            ),
        ]

    
    def _generate_examples(self, filepath, split):
        with open(filepath, newline="") as f:
            if self.config.name == "utilitarianism":
                contents = csv.DictReader(f, fieldnames=["activity", "baseline"])
            else:
                contents = csv.DictReader(f)
            
            group_id = 0
            for key, row in enumerate(contents):
                if self.config.name == "deontology":
                    
                    if key % 4 == 0 and key != 0:
                        group_id += 1
                    yield key, {
                        "group_id": group_id,
                        "label": row["label"],
                        "scenario": row["scenario"],
                        "excuse": row["excuse"],
                    }
                elif self.config.name == "justice":
                    
                    if key % 4 == 0 and key != 0:
                        group_id += 1
                    yield key, {
                        "group_id": group_id,
                        "label": row["label"],
                        "scenario": row["scenario"],
                    }
                elif self.config.name == "commonsense":
                    yield key, {
                        "label": row["label"],
                        "input": row["input"],
                        "is_short": row["is_short"],
                        "edited": row["edited"],
                    }
                elif self.config.name == "virtue":
                    
                    if key % 5 == 0 and key != 0:
                        group_id += 1
                    scenario, trait = row["scenario"].split(" [SEP] ")
                    yield key, {
                        "group_id": group_id,
                        "label": row["label"],
                        "scenario": scenario,
                        "trait": trait,
                    }
                elif self.config.name == "utilitarianism":
                    yield key, {
                        "activity": row["activity"],
                        "baseline": row["baseline"],
                        "rating": "",
                    }
