import re
from dataclasses import dataclass, field, fields
from typing import List, Optional

from omegaconf import II

from fairseq import utils
from fairseq.dataclass import ChoiceEnum, FairseqDataclass
from fairseq.utils import safe_getattr, safe_hasattr

from fairseq.models.transformer.transformer_config import (
    TransformerConfig,
)

@dataclass
class ByteSubwordEmbedConfig(FairseqDataclass):
    byte_subword: int = 1
    aggre: str = field(
        default='avg',
        metadata={"help": "the aggregation type of bytes to subword, select from avg or attention"},
    )
    layernum: int = field(
        default=1,
        metadata={"help": "the number of feed forward layers for word embedding"},
    )
    interdim: str = field(
        default="512",
        metadata={"help": "the dimension of feed forward layers for word embedding"},
    )
    padding_idx: int = field(
        default=258,
        metadata={"help": "the padding idx for byte embedding"},
    )
    subword_bytes_file: str = field(
        default="./subword_byte_table.pt",
        metadata={"help": "the directory of subword to byte table"},
    )
    embedding_file: str = field(
        default="./embeddings",
        metadata={"help": "the directory of byte embeddings"},
    )

    std: float = field(
        default=1.0,
        metadata={"help": "std of initialization of byte embeddings"},
    )

    relu_dropout: float = field(
        default=0.5,
        metadata={"help": "std of initialization of byte embeddings"},
    )
    # absdsd: str = field(
    #     default="./embeddings",
    #     metadata={"help": "the directory of byte embeddings"},
    # )
    


@dataclass
class ByteSubwordTransformerConfig(TransformerConfig):
    bw: ByteSubwordEmbedConfig = ByteSubwordEmbedConfig()
