import logging

from LLMProxy.option import parse_preprocess_args
from LLMProxy.data.binarizer import (
    Binarizer,
    FileBinarizer,
    LargeFileBinarizer,
)

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


def main(args):
    binarizer = Binarizer(
        tokenizer=args.tokenizer,
        append_eos=args.append_eos,
        already_numberized=args.numberized,
        token=args.auth,
    )

    if args.chunk_load:
        results = LargeFileBinarizer.process_large_file(
            binarizer=binarizer,
            args=args,
        )
    else:
        results = FileBinarizer.multiprocess_files(
            binarizer=binarizer,
            args=args,
        )
    results.summary()


if __name__ == '__main__':
    args = parse_preprocess_args()
    main(args)
