LICENSE
README.md
pyproject.toml
setup.py
fla/__init__.py
fla/utils.py
fla/layers/__init__.py
fla/layers/abc.py
fla/layers/attn.py
fla/layers/based.py
fla/layers/bitattn.py
fla/layers/delta_net.py
fla/layers/forgetting_attn.py
fla/layers/gated_deltanet.py
fla/layers/gated_deltaproduct.py
fla/layers/gla.py
fla/layers/gsa.py
fla/layers/hgrn.py
fla/layers/hgrn2.py
fla/layers/lightnet.py
fla/layers/linear_attn.py
fla/layers/multiscale_retention.py
fla/layers/nsa.py
fla/layers/rebased.py
fla/layers/rwkv6.py
fla/layers/rwkv7.py
fla/layers/simple_gla.py
fla/models/__init__.py
fla/models/utils.py
fla/models/abc/__init__.py
fla/models/abc/configuration_abc.py
fla/models/abc/modeling_abc.py
fla/models/bitnet/__init__.py
fla/models/bitnet/configuration_bitnet.py
fla/models/bitnet/modeling_bitnet.py
fla/models/delta_net/__init__.py
fla/models/delta_net/configuration_delta_net.py
fla/models/delta_net/modeling_delta_net.py
fla/models/forgetting_transformer/__init__.py
fla/models/forgetting_transformer/configuration_forgetting_transformer.py
fla/models/forgetting_transformer/modeling_forgetting_transformer.py
fla/models/gated_deltanet/__init__.py
fla/models/gated_deltanet/configuration_gated_deltanet.py
fla/models/gated_deltanet/modeling_gated_deltanet.py
fla/models/gated_deltaproduct/__init__.py
fla/models/gated_deltaproduct/configuration_gated_deltaproduct.py
fla/models/gated_deltaproduct/modeling_gated_deltaproduct.py
fla/models/gla/__init__.py
fla/models/gla/configuration_gla.py
fla/models/gla/modeling_gla.py
fla/models/gsa/__init__.py
fla/models/gsa/configuration_gsa.py
fla/models/gsa/modeling_gsa.py
fla/models/hgrn/__init__.py
fla/models/hgrn/configuration_hgrn.py
fla/models/hgrn/modeling_hgrn.py
fla/models/hgrn2/__init__.py
fla/models/hgrn2/configuration_hgrn2.py
fla/models/hgrn2/modeling_hgrn2.py
fla/models/lightnet/__init__.py
fla/models/lightnet/configuration_lightnet.py
fla/models/lightnet/modeling_lightnet.py
fla/models/linear_attn/__init__.py
fla/models/linear_attn/configuration_linear_attn.py
fla/models/linear_attn/modeling_linear_attn.py
fla/models/mamba/__init__.py
fla/models/mamba/configuration_mamba.py
fla/models/mamba/modeling_mamba.py
fla/models/mamba2/__init__.py
fla/models/mamba2/configuration_mamba2.py
fla/models/mamba2/modeling_mamba2.py
fla/models/nsa/__init__.py
fla/models/nsa/configuration_nsa.py
fla/models/nsa/modeling_nsa.py
fla/models/retnet/__init__.py
fla/models/retnet/configuration_retnet.py
fla/models/retnet/modeling_retnet.py
fla/models/rwkv6/__init__.py
fla/models/rwkv6/configuration_rwkv6.py
fla/models/rwkv6/modeling_rwkv6.py
fla/models/rwkv7/__init__.py
fla/models/rwkv7/configuration_rwkv7.py
fla/models/rwkv7/modeling_rwkv7.py
fla/models/samba/__init__.py
fla/models/samba/configuration_samba.py
fla/models/samba/modeling_samba.py
fla/models/transformer/__init__.py
fla/models/transformer/configuration_transformer.py
fla/models/transformer/modeling_transformer.py
fla/models/transformer_mtp/__init__.py
fla/models/transformer_mtp/configuration_transformer.py
fla/models/transformer_mtp/modeling_transformer.py
fla/models/transformer_top/__init__.py
fla/models/transformer_top/configuration_transformer.py
fla/models/transformer_top/modeling_transformer.py
fla/modules/__init__.py
fla/modules/activations.py
fla/modules/convolution.py
fla/modules/feature_map.py
fla/modules/fused_bitlinear.py
fla/modules/fused_cross_entropy.py
fla/modules/fused_kl_div.py
fla/modules/fused_linear_cross_entropy.py
fla/modules/fused_linear_listnet_loss.py
fla/modules/fused_norm_gate.py
fla/modules/grpo.py
fla/modules/l2norm.py
fla/modules/layernorm.py
fla/modules/layernorm_gated.py
fla/modules/mlp.py
fla/modules/parallel.py
fla/modules/rotary.py
fla/modules/seq_to_top.py
fla/ops/__init__.py
fla/ops/abc/__init__.py
fla/ops/abc/chunk.py
fla/ops/abc/naive.py
fla/ops/attn/__init__.py
fla/ops/attn/parallel.py
fla/ops/based/__init__.py
fla/ops/based/fused_chunk.py
fla/ops/based/naive.py
fla/ops/based/parallel.py
fla/ops/common/__init__.py
fla/ops/common/chunk_delta_h.py
fla/ops/common/chunk_h.py
fla/ops/common/chunk_h_parallel.py
fla/ops/common/chunk_h_split.py
fla/ops/common/chunk_o.py
fla/ops/common/chunk_scaled_dot_kkt.py
fla/ops/common/fused_recurrent.py
fla/ops/common/utils.py
fla/ops/delta_rule/__init__.py
fla/ops/delta_rule/chunk.py
fla/ops/delta_rule/fused_chunk.py
fla/ops/delta_rule/fused_recurrent.py
fla/ops/delta_rule/naive.py
fla/ops/delta_rule/parallel.py
fla/ops/delta_rule/wy_fast.py
fla/ops/forgetting_attn/__init__.py
fla/ops/forgetting_attn/parallel.py
fla/ops/gated_delta_rule/__init__.py
fla/ops/gated_delta_rule/chunk.py
fla/ops/gated_delta_rule/fused_recurrent.py
fla/ops/gated_delta_rule/wy_fast.py
fla/ops/generalized_delta_rule/__init__.py
fla/ops/generalized_delta_rule/dplr/__init__.py
fla/ops/generalized_delta_rule/dplr/chunk.py
fla/ops/generalized_delta_rule/dplr/chunk_A_bwd.py
fla/ops/generalized_delta_rule/dplr/chunk_A_fwd.py
fla/ops/generalized_delta_rule/dplr/chunk_h_bwd.py
fla/ops/generalized_delta_rule/dplr/chunk_h_fwd.py
fla/ops/generalized_delta_rule/dplr/chunk_o_bwd.py
fla/ops/generalized_delta_rule/dplr/chunk_o_fwd.py
fla/ops/generalized_delta_rule/dplr/fused_recurrent.py
fla/ops/generalized_delta_rule/dplr/naive.py
fla/ops/generalized_delta_rule/dplr/wy_fast_bwd.py
fla/ops/generalized_delta_rule/dplr/wy_fast_fwd.py
fla/ops/generalized_delta_rule/iplr/__init__.py
fla/ops/generalized_delta_rule/iplr/chunk.py
fla/ops/generalized_delta_rule/iplr/fused_recurrent.py
fla/ops/generalized_delta_rule/iplr/naive.py
fla/ops/generalized_delta_rule/iplr/wy_fast.py
fla/ops/gla/__init__.py
fla/ops/gla/chunk.py
fla/ops/gla/fused_chunk.py
fla/ops/gla/fused_recurrent.py
fla/ops/gla/naive.py
fla/ops/gsa/__init__.py
fla/ops/gsa/chunk.py
fla/ops/gsa/fused_recurrent.py
fla/ops/gsa/naive.py
fla/ops/hgrn/__init__.py
fla/ops/hgrn/chunk.py
fla/ops/hgrn/fused_recurrent.py
fla/ops/hgrn/naive.py
fla/ops/lightning_attn/__init__.py
fla/ops/lightning_attn/chunk.py
fla/ops/lightning_attn/fused_recurrent.py
fla/ops/linear_attn/__init__.py
fla/ops/linear_attn/chunk.py
fla/ops/linear_attn/fused_chunk.py
fla/ops/linear_attn/fused_recurrent.py
fla/ops/linear_attn/naive.py
fla/ops/linear_attn/utils.py
fla/ops/nsa/__init__.py
fla/ops/nsa/naive.py
fla/ops/nsa/parallel.py
fla/ops/nsa/utils.py
fla/ops/rebased/__init__.py
fla/ops/rebased/naive.py
fla/ops/rebased/parallel.py
fla/ops/retention/__init__.py
fla/ops/retention/chunk.py
fla/ops/retention/fused_chunk.py
fla/ops/retention/fused_recurrent.py
fla/ops/retention/naive.py
fla/ops/retention/parallel.py
fla/ops/rwkv4/__init__.py
fla/ops/rwkv4/fused_recurrent.py
fla/ops/rwkv6/__init__.py
fla/ops/rwkv6/chunk.py
fla/ops/rwkv6/chunk_naive.py
fla/ops/rwkv6/fused_recurrent.py
fla/ops/rwkv6/recurrent_naive.py
fla/ops/rwkv7/__init__.py
fla/ops/rwkv7/channel_mixing.py
fla/ops/rwkv7/chunk.py
fla/ops/rwkv7/fused_addcmul.py
fla/ops/rwkv7/fused_recurrent.py
fla/ops/simple_gla/__init__.py
fla/ops/simple_gla/chunk.py
fla/ops/simple_gla/fused_recurrent.py
fla/ops/simple_gla/naive.py
fla/ops/simple_gla/parallel.py
fla/ops/titans/__init__.py
fla/ops/titans/log_impl.py
fla/ops/titans/naive.py
fla/ops/ttt/__init__.py
fla/ops/ttt/chunk.py
fla/ops/ttt/fused_chunk.py
fla/ops/ttt/naive.py
fla/ops/utils/__init__.py
fla/ops/utils/asm.py
fla/ops/utils/cumsum.py
fla/ops/utils/logcumsumexp.py
fla/ops/utils/logsumexp.py
fla/ops/utils/matmul.py
fla/ops/utils/op.py
fla/ops/utils/pooling.py
fla/ops/utils/softmax.py
fla/ops/utils/solve_tril.py
fla/ops/utils/testing.py
flame/__init__.py
flame/config_manager.py
flame/data.py
flame/train.py
flame.egg-info/PKG-INFO
flame.egg-info/SOURCES.txt
flame.egg-info/dependency_links.txt
flame.egg-info/requires.txt
flame.egg-info/top_level.txt
flame/components/__init__.py
flame/components/checkpoint.py
flame/models/__init__.py
flame/models/activation_offloading.py
flame/models/parallelize_fla.py
flame/models/pipeline_fla.py
flame/tools/__init__.py
flame/tools/utils.py
flame/utils/__init__.py
flame/utils/checkpoint.py
flame/utils/convert_dcp_to_hf.py
flame/utils/convert_hf_to_dcp.py
flame/utils/hf_utils.py
torchtitan/__init__.py
torchtitan/config_manager.py
torchtitan/train.py
torchtitan/distributed/__init__.py
torchtitan/distributed/parallel_dims.py
torchtitan/distributed/pipeline.py
torchtitan/distributed/utils.py
torchtitan/experiments/__init__.py
torchtitan/experiments/flux/__init__.py
torchtitan/experiments/flux/flux_argparser.py
torchtitan/experiments/flux/loss.py
torchtitan/experiments/flux/parallelize_flux.py
torchtitan/experiments/flux/train.py
torchtitan/experiments/flux/utils.py
torchtitan/experiments/llama4/__init__.py
torchtitan/experiments/multimodal/__init__.py
torchtitan/experiments/multimodal/check_padding_mm.py
torchtitan/experiments/multimodal/mm_collator.py
torchtitan/experiments/multimodal/mm_dataset.py
torchtitan/experiments/multimodal/model.py
torchtitan/experiments/multimodal/transform.py
torchtitan/experiments/multimodal/utils.py
torchtitan/experiments/multimodal/tests/__init__.py
torchtitan/experiments/multimodal/tests/test_multimodal_model.py
torchtitan/experiments/multimodal/tests/test_utils.py
torchtitan/experiments/simple_fsdp/__init__.py
torchtitan/experiments/simple_fsdp/model.py
torchtitan/experiments/simple_fsdp/parallelize_llama.py
torchtitan/experiments/simple_fsdp/simple_fsdp.py
torchtitan/experiments/simple_fsdp/tests/__init__.py
torchtitan/experiments/simple_fsdp/tests/test_numerics.py
torchtitan/models/__init__.py
torchtitan/models/attention.py
torchtitan/models/norms.py
torchtitan/models/llama3/__init__.py
torchtitan/models/llama3/model.py
torchtitan/models/llama3/parallelize_llama.py
torchtitan/models/llama3/pipeline_llama.py