from dataclasses import dataclass, field

@dataclass
class DataConfig:
    seed_data = 3407
    data_type = "flipflop" # fixed
    n_dim = 10
    n_context = 512 
    n_vocab = 2

    noise_level = 0.1
    positive_ratio = 0.9
    orth_noise = True
    patched = True
    class_num = 10 # linear 10, tf 50
    weights = "exponential"

    n_train = 100
    n_test = 500
    n_epoch = 30
    batch_size = 250


@dataclass
class ModelConfig:
    seed_model: int = 0
    act_linear: bool = False
    act_q: int = 3
    hidden_size: int = 1
    vocab_size: int = 2

    dk: int = 1
    dv: int = 1
    num_attention_heads: int = 1
    attn_type: str = "softmax"
    fixed_qk: bool = False
    fixed_v: bool = False
    model_cls: str = "none"
    attn_scaling_type: str = "1"
    max_position_embeddings: int = 1024
    causal_attention: bool = False
    val_init_type: str = "normal"

    patch_size: int = 0
