.bandit
.editorconfig
.flake8
.gitattributes
.gitignore
.isort.cfg
.mypy.ini
.pre-commit-config.yaml
.pylintrc
FAQS.md
LICENSE
MANIFEST.in
README.md
TODO.md
_quarto.yml
docker-compose.yaml
favicon.jpg
index.qmd
pyproject.toml
requirements-dev.txt
requirements-tests.txt
requirements.txt
requirements_env.txt
setup.py
styles.css
.github/CODE_OF_CONDUCT.md
.github/CONTRIBUTING.md
.github/FUNDING.yml
.github/PULL_REQUEST_TEMPLATE.md
.github/SECURITY.md
.github/SUPPORT.md
.github/release-drafter.yml
.github/ISSUE_TEMPLATE/bug-report.yaml
.github/ISSUE_TEMPLATE/config.yml
.github/ISSUE_TEMPLATE/docs.yml
.github/ISSUE_TEMPLATE/feature-request.yaml
.github/workflows/base.yml
.github/workflows/docs.yml
.github/workflows/lint.yml
.github/workflows/main.yml
.github/workflows/multi-gpu-e2e.yml
.github/workflows/nightlies.yml
.github/workflows/pypi.yml
.github/workflows/tests-nightly.yml
.github/workflows/tests.yml
.vscode/README.md
.vscode/launch.json
.vscode/tasks.json
cicd/Dockerfile.jinja
cicd/cicd.sh
cicd/multigpu.py
cicd/multigpu.sh
cicd/tests.py
deepspeed_configs/zero1.json
deepspeed_configs/zero1_torch_compile.json
deepspeed_configs/zero2.json
deepspeed_configs/zero3.json
deepspeed_configs/zero3_bf16.json
deepspeed_configs/zero3_bf16_cpuoffload_all.json
deepspeed_configs/zero3_bf16_cpuoffload_params.json
devtools/README.md
devtools/dev_chat_template.yml
docker/Dockerfile
docker/Dockerfile-base
docker/Dockerfile-cloud
docker/Dockerfile-cloud-no-tmux
docker/Dockerfile-tests
docs/.gitignore
docs/amd_hpc.qmd
docs/batch_vs_grad.qmd
docs/config.qmd
docs/dataset_preprocessing.qmd
docs/debugging.qmd
docs/faq.qmd
docs/fsdp_qlora.qmd
docs/input_output.qmd
docs/mac.qmd
docs/multi-node.qmd
docs/multimodal.qmd
docs/multipack.qmd
docs/nccl.qmd
docs/rlhf.qmd
docs/torchao.qmd
docs/unsloth.qmd
docs/dataset-formats/conversation.qmd
docs/dataset-formats/index.qmd
docs/dataset-formats/inst_tune.qmd
docs/dataset-formats/pretraining.qmd
docs/dataset-formats/template_free.qmd
docs/dataset-formats/tokenized.qmd
docs/images/4d-mask.png
examples/cerebras/btlm-ft.yml
examples/cerebras/qlora.yml
examples/code-llama/README.md
examples/code-llama/13b/lora.yml
examples/code-llama/13b/qlora.yml
examples/code-llama/34b/lora.yml
examples/code-llama/34b/qlora.yml
examples/code-llama/7b/lora.yml
examples/code-llama/7b/qlora.yml
examples/colab-notebooks/colab-axolotl-example.ipynb
examples/dbrx/16bit-lora.yaml
examples/dbrx/8bit-lora.yaml
examples/dbrx/README.md
examples/dbrx/fft-ds-zero3.yaml
examples/deepseek-v2/fft-fsdp-16b.yaml
examples/deepseek-v2/qlora-fsdp-2_5.yaml
examples/falcon/config-7b-lora.yml
examples/falcon/config-7b-qlora.yml
examples/falcon/config-7b.yml
examples/gemma/qlora.yml
examples/gemma2/qlora.yml
examples/gemma2/reward-model.yaml
examples/gptj/qlora.yml
examples/jamba/README.md
examples/jamba/qlora.yaml
examples/jamba/qlora_deepspeed.yaml
examples/jamba/qlora_fsdp_large.yaml
examples/jeopardy-bot/config.yml
examples/llama-2/README.md
examples/llama-2/fft_optimized.yml
examples/llama-2/gptq-lora.yml
examples/llama-2/lisa.yml
examples/llama-2/loftq.yml
examples/llama-2/lora.yml
examples/llama-2/qlora-fsdp.yml
examples/llama-2/qlora.yml
examples/llama-2/relora.yml
examples/llama-3/README.md
examples/llama-3/fft-8b-liger-fsdp.yaml
examples/llama-3/fft-8b.yaml
examples/llama-3/instruct-dpo-lora-8b.yml
examples/llama-3/instruct-lora-8b.yml
examples/llama-3/lora-1b-deduplicate-dpo.yml
examples/llama-3/lora-1b-deduplicate-sft.yml
examples/llama-3/lora-1b.yml
examples/llama-3/lora-8b.yml
examples/llama-3/qlora-1b-kto.yaml
examples/llama-3/qlora-1b.yml
examples/llama-3/qlora-fsdp-405b.yaml
examples/llama-3/qlora-fsdp-70b.yaml
examples/llama-3/qlora.yml
examples/llama-3-vision/lora-11b.yaml
examples/mamba/config.yml
examples/mistral/README.md
examples/mistral/bigstral-ds-zero3.yaml
examples/mistral/config.yml
examples/mistral/lora-mps.yml
examples/mistral/lora.yml
examples/mistral/mistral-dpo-qlora.yml
examples/mistral/mistral-qlora-fsdp.yml
examples/mistral/mistral-qlora-orpo.yml
examples/mistral/mixtral-8x22b-qlora-fsdp.yml
examples/mistral/mixtral-qlora-fsdp.yml
examples/mistral/mixtral.yml
examples/mistral/mixtral_22.yml
examples/mistral/qlora.yml
examples/mpt-7b/README.md
examples/mpt-7b/config.yml
examples/openllama-3b/README.md
examples/openllama-3b/config.yml
examples/openllama-3b/lora.yml
examples/openllama-3b/qlora.yml
examples/phi/README.md
examples/phi/lora-3.5.yaml
examples/phi/phi-ft.yml
examples/phi/phi-qlora.yml
examples/phi/phi2-ft.yml
examples/phi/phi3-ft-fsdp.yml
examples/phi/phi3-ft.yml
examples/pythia/lora.yml
examples/pythia-12b/README.md
examples/pythia-12b/config.yml
examples/qwen/README.md
examples/qwen/lora.yml
examples/qwen/qlora.yml
examples/qwen/qwen2-moe-lora.yaml
examples/qwen/qwen2-moe-qlora.yaml
examples/qwen2/dpo.yaml
examples/qwen2/qlora-fsdp.yaml
examples/redpajama/README.md
examples/redpajama/config-3b.yml
examples/replit-3b/config-lora.yml
examples/stablelm-2/README.md
examples/stablelm-2/1.6b/fft.yml
examples/stablelm-2/1.6b/lora.yml
examples/starcoder2/qlora.yml
examples/tiny-llama/README.md
examples/tiny-llama/lora-mps.yml
examples/tiny-llama/lora.yml
examples/tiny-llama/pretrain.yml
examples/tiny-llama/qlora.yml
examples/xgen-7b/xgen-7b-8k-qlora.yml
examples/yi-34B-chat/README.md
examples/yi-34B-chat/qlora.yml
image/axolotl-badge-web-legacy.png
image/axolotl-badge-web.png
image/axolotl.png
image/axolotl_logo_digital_black.svg
image/axolotl_logo_digital_white.svg
image/axolotl_symbol_digital_black.svg
image/axolotl_symbol_digital_white.svg
image/axolotl_wordmark_digital_black.svg
image/axolotl_wordmark_digital_white.svg
image/sticker_fixed.png
scripts/chat_datasets.py
scripts/cloud-entrypoint-term.sh
scripts/cloud-entrypoint.sh
scripts/cutcrossentropy_install.py
scripts/finetune.py
scripts/motd
scripts/unsloth_install.py
src/setuptools_axolotl_dynamic_dependencies.py
src/axolotl/__init__.py
src/axolotl/convert.py
src/axolotl/datasets.py
src/axolotl/evaluate.py
src/axolotl/logging_config.py
src/axolotl/prompt_tokenizers.py
src/axolotl/prompters.py
src/axolotl/train.py
src/axolotl.egg-info/PKG-INFO
src/axolotl.egg-info/SOURCES.txt
src/axolotl.egg-info/dependency_links.txt
src/axolotl.egg-info/entry_points.txt
src/axolotl.egg-info/requires.txt
src/axolotl.egg-info/top_level.txt
src/axolotl/cli/__init__.py
src/axolotl/cli/evaluate.py
src/axolotl/cli/inference.py
src/axolotl/cli/main.py
src/axolotl/cli/merge_lora.py
src/axolotl/cli/merge_sharded_fsdp_weights.py
src/axolotl/cli/preprocess.py
src/axolotl/cli/shard.py
src/axolotl/cli/train.py
src/axolotl/cli/utils.py
src/axolotl/common/__init__.py
src/axolotl/common/architectures.py
src/axolotl/common/cli.py
src/axolotl/common/const.py
src/axolotl/core/__init__.py
src/axolotl/core/trainer_builder.py
src/axolotl/core/chat/__init__.py
src/axolotl/core/chat/messages.py
src/axolotl/core/chat/format/__init__.py
src/axolotl/core/chat/format/chatml.py
src/axolotl/core/chat/format/llama3x.py
src/axolotl/core/chat/format/shared.py
src/axolotl/core/datasets/__init__.py
src/axolotl/core/datasets/chat.py
src/axolotl/core/datasets/transforms/__init__.py
src/axolotl/core/datasets/transforms/chat_builder.py
src/axolotl/core/trainers/__init__.py
src/axolotl/core/trainers/trl.py
src/axolotl/integrations/LICENSE.md
src/axolotl/integrations/__init__.py
src/axolotl/integrations/base.py
src/axolotl/integrations/config.py
src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.md
src/axolotl/integrations/cut_cross_entropy/LICENSE
src/axolotl/integrations/cut_cross_entropy/README.md
src/axolotl/integrations/cut_cross_entropy/__init__.py
src/axolotl/integrations/cut_cross_entropy/args.py
src/axolotl/integrations/grokfast/LICENSE
src/axolotl/integrations/grokfast/README.md
src/axolotl/integrations/grokfast/__init__.py
src/axolotl/integrations/grokfast/args.py
src/axolotl/integrations/grokfast/optimizer.py
src/axolotl/integrations/liger/LICENSE
src/axolotl/integrations/liger/__init__.py
src/axolotl/integrations/liger/args.py
src/axolotl/integrations/liger/models/deepseekv2.py
src/axolotl/integrations/liger/models/jamba.py
src/axolotl/integrations/lm_eval/README.md
src/axolotl/integrations/lm_eval/__init__.py
src/axolotl/integrations/lm_eval/args.py
src/axolotl/integrations/spectrum/LICENSE
src/axolotl/integrations/spectrum/README.md
src/axolotl/integrations/spectrum/__init__.py
src/axolotl/integrations/spectrum/args.py
src/axolotl/models/__init__.py
src/axolotl/models/mamba/__init__.py
src/axolotl/models/mamba/configuration_mamba.py
src/axolotl/models/mamba/modeling_mamba.py
src/axolotl/monkeypatch/__init__.py
src/axolotl/monkeypatch/btlm_attn_hijack_flash.py
src/axolotl/monkeypatch/llama_attn_hijack_flash.py
src/axolotl/monkeypatch/llama_attn_hijack_xformers.py
src/axolotl/monkeypatch/llama_expand_mask.py
src/axolotl/monkeypatch/llama_patch_multipack.py
src/axolotl/monkeypatch/mistral_attn_hijack_flash.py
src/axolotl/monkeypatch/multipack.py
src/axolotl/monkeypatch/relora.py
src/axolotl/monkeypatch/stablelm_attn_hijack_flash.py
src/axolotl/monkeypatch/trainer_fsdp_optim.py
src/axolotl/monkeypatch/trainer_grad_accum.py
src/axolotl/monkeypatch/unsloth_.py
src/axolotl/monkeypatch/utils.py
src/axolotl/monkeypatch/attention/__init__.py
src/axolotl/monkeypatch/attention/mllama.py
src/axolotl/monkeypatch/data/__init__.py
src/axolotl/monkeypatch/data/batch_dataset_fetcher.py
src/axolotl/monkeypatch/mixtral/__init__.py
src/axolotl/monkeypatch/xformers_/__init__.py
src/axolotl/prompt_strategies/__init__.py
src/axolotl/prompt_strategies/alpaca_chat.py
src/axolotl/prompt_strategies/alpaca_instruct.py
src/axolotl/prompt_strategies/alpaca_w_system.py
src/axolotl/prompt_strategies/base.py
src/axolotl/prompt_strategies/chat_template.py
src/axolotl/prompt_strategies/completion.py
src/axolotl/prompt_strategies/context_qa.py
src/axolotl/prompt_strategies/creative_acr.py
src/axolotl/prompt_strategies/input_output.py
src/axolotl/prompt_strategies/llama2_chat.py
src/axolotl/prompt_strategies/metharme.py
src/axolotl/prompt_strategies/orcamini.py
src/axolotl/prompt_strategies/pretrain.py
src/axolotl/prompt_strategies/pygmalion.py
src/axolotl/prompt_strategies/user_defined.py
src/axolotl/prompt_strategies/bradley_terry/README.md
src/axolotl/prompt_strategies/bradley_terry/__init__.py
src/axolotl/prompt_strategies/bradley_terry/chat_template.py
src/axolotl/prompt_strategies/bradley_terry/llama3.py
src/axolotl/prompt_strategies/dpo/__init__.py
src/axolotl/prompt_strategies/dpo/chat_template.py
src/axolotl/prompt_strategies/dpo/chatml.py
src/axolotl/prompt_strategies/dpo/llama3.py
src/axolotl/prompt_strategies/dpo/user_defined.py
src/axolotl/prompt_strategies/dpo/zephyr.py
src/axolotl/prompt_strategies/kto/__init__.py
src/axolotl/prompt_strategies/kto/chatml.py
src/axolotl/prompt_strategies/kto/llama3.py
src/axolotl/prompt_strategies/kto/user_defined.py
src/axolotl/prompt_strategies/messages/__init__.py
src/axolotl/prompt_strategies/messages/chat.py
src/axolotl/prompt_strategies/orpo/__init__.py
src/axolotl/prompt_strategies/orpo/chat_template.py
src/axolotl/utils/__init__.py
src/axolotl/utils/bench.py
src/axolotl/utils/chat_templates.py
src/axolotl/utils/comet_.py
src/axolotl/utils/dict.py
src/axolotl/utils/distributed.py
src/axolotl/utils/environment.py
src/axolotl/utils/freeze.py
src/axolotl/utils/lora_embeddings.py
src/axolotl/utils/mlflow_.py
src/axolotl/utils/model_shard_quant.py
src/axolotl/utils/models.py
src/axolotl/utils/schedulers.py
src/axolotl/utils/tokenization.py
src/axolotl/utils/trainer.py
src/axolotl/utils/wandb_.py
src/axolotl/utils/callbacks/__init__.py
src/axolotl/utils/callbacks/comet_.py
src/axolotl/utils/callbacks/lisa.py
src/axolotl/utils/callbacks/mlflow_.py
src/axolotl/utils/callbacks/perplexity.py
src/axolotl/utils/callbacks/profiler.py
src/axolotl/utils/collators/__init__.py
src/axolotl/utils/collators/batching.py
src/axolotl/utils/collators/core.py
src/axolotl/utils/collators/mamba.py
src/axolotl/utils/collators/mm_chat.py
src/axolotl/utils/config/__init__.py
src/axolotl/utils/config/models/__init__.py
src/axolotl/utils/config/models/input/__init__.py
src/axolotl/utils/config/models/input/next/__init__.py
src/axolotl/utils/config/models/input/v0_4_1/__init__.py
src/axolotl/utils/config/models/internals/__init__.py
src/axolotl/utils/data/__init__.py
src/axolotl/utils/data/pretraining.py
src/axolotl/utils/data/rl.py
src/axolotl/utils/data/sft.py
src/axolotl/utils/data/shared.py
src/axolotl/utils/data/utils.py
src/axolotl/utils/gradient_checkpointing/__init__.py
src/axolotl/utils/gradient_checkpointing/unsloth.py
src/axolotl/utils/optimizers/__init__.py
src/axolotl/utils/optimizers/adopt.py
src/axolotl/utils/samplers/__init__.py
src/axolotl/utils/samplers/multipack.py
src/axolotl/utils/samplers/utils.py
tests/conftest.py
tests/constants.py
tests/test_data.py
tests/test_datasets.py
tests/test_dict.py
tests/test_exact_deduplication.py
tests/test_expand_mask.py
tests/test_freeze.py
tests/test_normalize_config.py
tests/test_packed_batch_sampler.py
tests/test_packed_dataset.py
tests/test_packed_pretraining.py
tests/test_perplexity.py
tests/test_prompt_tokenizers.py
tests/test_prompters.py
tests/test_schedulers.py
tests/test_tokenizers.py
tests/test_validation_dataset.py
tests/cli/__init__.py
tests/cli/conftest.py
tests/cli/test_cli_base.py
tests/cli/test_cli_evaluate.py
tests/cli/test_cli_fetch.py
tests/cli/test_cli_inference.py
tests/cli/test_cli_interface.py
tests/cli/test_cli_merge_lora.py
tests/cli/test_cli_merge_sharded_fsdp_weights.py
tests/cli/test_cli_preprocess.py
tests/cli/test_cli_shard.py
tests/cli/test_cli_train.py
tests/cli/test_cli_version.py
tests/cli/test_utils.py
tests/core/test_trainer_builder.py
tests/core/chat/__init__.py
tests/core/chat/test_messages.py
tests/core/chat/format/__init__.py
tests/e2e/.gitignore
tests/e2e/__init__.py
tests/e2e/test_dpo.py
tests/e2e/test_embeddings_lr.py
tests/e2e/test_falcon.py
tests/e2e/test_imports.py
tests/e2e/test_llama.py
tests/e2e/test_llama_pretrain.py
tests/e2e/test_llama_vision.py
tests/e2e/test_load_model.py
tests/e2e/test_lora_llama.py
tests/e2e/test_mamba.py
tests/e2e/test_mistral.py
tests/e2e/test_mixtral.py
tests/e2e/test_optimizers.py
tests/e2e/test_packing_loss.py
tests/e2e/test_phi.py
tests/e2e/test_qwen.py
tests/e2e/test_relora_llama.py
tests/e2e/test_reward_model_llama.py
tests/e2e/utils.py
tests/e2e/integrations/__init__.py
tests/e2e/integrations/liger.py
tests/e2e/integrations/test_cut_cross_entropy.py
tests/e2e/multigpu/__init__.py
tests/e2e/multigpu/test_eval.py
tests/e2e/multigpu/test_llama.py
tests/e2e/multigpu/test_qwen2.py
tests/e2e/patched/__init__.py
tests/e2e/patched/test_4d_multipack_llama.py
tests/e2e/patched/test_cli_integrations.py
tests/e2e/patched/test_fa_xentropy.py
tests/e2e/patched/test_falcon_samplepack.py
tests/e2e/patched/test_fused_llama.py
tests/e2e/patched/test_llama_s2_attention.py
tests/e2e/patched/test_lora_llama_multipack.py
tests/e2e/patched/test_mistral_samplepack.py
tests/e2e/patched/test_mixtral_samplepack.py
tests/e2e/patched/test_model_patches.py
tests/e2e/patched/test_phi_multipack.py
tests/e2e/patched/test_resume.py
tests/e2e/patched/test_unsloth_integration.py
tests/e2e/patched/test_unsloth_qlora.py
tests/fixtures/conversation.json
tests/fixtures/conversation.missingturns.json
tests/fixtures/conversation.tokenized.json
tests/fixtures/conversation.tokenized_llama2chat.json
tests/fixtures/alpaca/alpaca.json
tests/integrations/__init__.py
tests/integrations/liger.py
tests/monkeypatch/test_llama_attn_hijack_flash.py
tests/patched/test_llama_trainer_ga.py
tests/patched/test_validation.py
tests/prompt_strategies/__init__.py
tests/prompt_strategies/conftest.py
tests/prompt_strategies/test_alpaca.py
tests/prompt_strategies/test_chat_template_utils.py
tests/prompt_strategies/test_chat_templates.py
tests/prompt_strategies/test_chat_templates_advanced.py
tests/prompt_strategies/test_dpo_chat_templates.py
tests/prompt_strategies/test_raw_io.py
tests/prompt_strategies/messages/__init__.py
tests/prompt_strategies/messages/test_chat.py
tests/utils/test_models.py