task = "sweep"
seed = 2025


# dataset
dataset = "RNAGymDataset"
dataset_cfg = {
    "seq_type": "dna",
    "path": "data/DMS_RNAGym_substitutions",
    # "num_samples": 30,  # None for all samples
}
batch_size = 1
num_workers = 4  # number of workers for data loading
output_dir = "output/rnagym/evo2-40b/date"
metrics = [["ndcg_abs"]]
score_modes = [
    "all"
]  # Options: "all" (average of for+rev), "for" (forward only), "rev" (reverse only)


# model
model = "EVO2_40B"
kmer = 1
evo1_score = False
addbos = False


# generation
generate_mode = "forward_generation"
generate_cfg = [
    {
        "temperature": 1,
        "top_k": 1,
        "top_p": 1,
    }
]


# evaluation
eval_interval = 20000


# wandb
use_wandb = False  # keep False for anonymity; set True after review
wandb_proj_name = "ANON_PROJECT"  # fill after review if needed
entity = "ANONYMOUS"  # fill after review if needed
