{
    "uuid": "98f8113e-ec12-53ee-877c-ed347c655fbd",
    "question": "Which paper found that using common character encodings and ciphers, or even just convincing the model that it is not communicating in natural language, can bypass the safety guardrails of large models?",
    "answer_format": "Your answer should be the title of the paper WITHOUT ANY EXPLANATION.",
    "tags": [
        "retrieval",
        "text",
        "objective"
    ],
    "anchor_pdf": [],
    "reference_pdf": [],
    "conference": [
        "iclr2024"
    ],
    "reasoning_steps": [],
    "evaluator": {
        "eval_func": "eval_paper_relevance_with_reference_answer",
        "eval_kwargs": {
            "question": "Which paper found that using common character encodings and ciphers, or even just convincing the model that it is not communicating in natural language, can bypass the safety guardrails of large models?",
            "reference_answer": "GPT-4 IS TOO SMART TO BE SAFE: STEALTHY CHAT WITH LLMS VIA CIPHER"
        }
    },
    "state": {},
    "annotator": "litsearch_manual"
}