# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.

[build-system]
requires = ["setuptools<80.0.0", "pybind11"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
include-package-data = true

[tool.setuptools.packages.find]
include = ["megatron.core", "megatron.core.*"]

[tool.setuptools.dynamic]
version = { attr = "megatron.core.package_info.__version__" }
readme = { file = "README.md", content-type = "text/markdown" }

[project]
name = "megatron-core"
dynamic = ["version", "readme"]
description = "Megatron Core - a library for efficient and scalable training of transformer based models"
requires-python = ">=3.10"
license = { text = "Apache 2.0" }
dependencies = ["torch", "numpy<2.0.0", "packaging"]
authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
maintainers = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
keywords = [
    "NLP",
    "NLU",
    "deep",
    "gpu",
    "language",
    "learning",
    "learning",
    "machine",
    "nvidia",
    "pytorch",
    "torch",
    "transformer",
]
classifiers = [
    "Development Status :: 5 - Production/Stable",
    "Environment :: Console",
    "Intended Audience :: Developers",
    "Intended Audience :: Information Technology",
    "Intended Audience :: Science/Research",
    "License :: OSI Approved :: BSD License",
    "Natural Language :: English",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.8",
    "Programming Language :: Python :: 3.9",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Topic :: Scientific/Engineering :: Image Recognition",
    "Topic :: Scientific/Engineering :: Mathematics",
    "Topic :: Scientific/Engineering",
    "Topic :: Software Development :: Libraries :: Python Modules",
    "Topic :: Software Development :: Libraries",
    "Topic :: Utilities",
]

[project.urls]
Download = "https://github.com/NVIDIA/Megatron-LM/releases"
Homepage = "https://github.com/NVIDIA/Megatron-LM/megatron/core"

[project.optional-dependencies]
mlm = ["flask-restful", "sentencepiece", "tiktoken", "wandb"]

dev = [
    "tqdm",
    "einops~=0.8",
    "tensorstore~=0.1,!=0.1.46,!=0.1.72",
    "nvtx~=0.2",
    "transformers~=4.53",
    "multi-storage-client~=0.20",
    "opentelemetry-api~=1.33.1",
    "setuptools<80.0.0",
    "mamba-ssm~=2.2",
    "causal-conv1d~=1.5",
    "nv-grouped-gemm~=1.1",
    "transformer-engine[pytorch]>=2.5.0a0,<2.7.0",
    "nvidia-resiliency-ext>=0.4.0a0,<0.5.0",
    "nvidia-modelopt[torch]>=0.33.0a0,<0.34.0; sys_platform != 'darwin'",
    "megatron-energon[av_decode]~=6.0",
    "flashinfer-python",
    "onnxscript",
]

lts = [
    "tqdm",
    "einops",
    "tensorstore!=0.1.46,!=0.1.72",
    "nvtx",
    "transformers",
    "zarr",
    "setuptools<80.0.0",
]

[dependency-groups]
test = [
    "coverage",
    "nltk",
    "wrapt",
    "pytest==8.3.5",
    "pytest-mock",
    "pytest-cov",
    "pytest-random-order",
    "pytest-asyncio",
    "pygithub",
]

build = [
    "setuptools<80.0.0",
    "packaging",
    "hatchling",
    "pybind11",
    "Cython>=3.0.0",
    "torch",
]
linting = [
    "ruff~=0.9.0",
    "black==24.4.2",
    "isort==5.13.2",
    "flake8==7.1.0",
    "pylint==3.2.6",
]
ci = ["python-gitlab", "slack-sdk", "pandas"]
flash_mla = ["flash_mla"]

[tool.uv]
default-groups = ["linting", "build", "test"]
no-build-isolation-package = [
    "transformer-engine",
    "transformer-engine-torch",
    "mamba-ssm",
    "causal-conv1d",
    "nv-grouped-gemm",
    "flash_mla",
]
link-mode = "copy"
conflicts = [[{ extra = "lts" }, { extra = "dev" }]]
# We don't want to install torch, torchvision, and triton
# because they are already installed in the base image.
override-dependencies = [
    "torch; sys_platform == 'never'",
    "torchvision; sys_platform == 'never'",
    "triton; sys_platform == 'never'",
]

[tool.uv.sources]
flash_mla = [
    { git = "https://github.com/deepseek-ai/FlashMLA", rev = "9edee0c022cd0938148a18e334203b0aab43aa19" },
]
transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "c90a720765bc01ebb5ba1f70a7ce2d16b2e46d77" } # on `release_v2.6`

[tool.isort]
profile = "black"                                                          # black-compatible
line_length = 100                                                          # should match black parameters
py_version = 310                                                           # python 3.8 as a target version
known_first_party = ["megatron"]                                           # FIRSTPARTY section
known_third_party = ["transformer_engine"]                                 # THIRDPARTY section
sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
default_section = "THIRDPARTY"
extend_skip = ["setup.py"]

[tool.black]
line_length = 100
skip_string_normalization = true
# recongized by future versions, disallows to reformat code with incompatible versions
# Matches NeMO version so people working on both codebases don't need two different version of black installed
required_version = "24"
skip_magic_trailing_comma = true
include = '\.pyi?$'
exclude = '''
/(
    \.git
  | \.venv
  | build
)/
'''

[tool.pytest.ini_options]
addopts = "--durations=15 -s -rA -x"
testpaths = ["tests"]
python_files = "test_*.py"
markers = [
    "internal: mark a test as a test to private/internal functions.",
    "flaky: mark flaky tests for LTS environment",
    "flaky_in_dev: mark flaky tests for DEV environment",
]

[tool.coverage.run]
data_file = ".coverage_$LOCAL_RANK"
concurrency = ["thread", "multiprocessing"]
omit = [
    "/tmp/*",
    "/workspace/tests/*",
    "/usr/local/lib/python3.12/dist-packages/*",
]
relative_files = true

[tool.coverage.paths]
source = ["/opt/megatron-lm/"]

[tool.ruff.lint]
# Enable all `pydocstyle` rules, limiting to those that adhere to the
# Google convention via `convention = "google"`, below.
# select = ["D", "F"]
select = ["S506"]

# - On top of the Google convention, disable `D417`, which requires
#   documentation for every function parameter.
# - F841: local variable assigned but never used (exluced to favor readability)
# TODO: Remove D10 once we are about to release to get all the docstrings written
ignore = ["D417", "D10", "F841"]

[tool.ruff.lint.pydocstyle]
convention = "google"

# Section to exclude errors for different file types
[tool.ruff.per-file-ignores]
# Ignore all directories named `tests`.
"tests/**" = ["D"]
# Ignore all files that end in `_test.py`.
"*_test.py" = ["D"]
# Ignore F401 (import but unused) in __init__.py
"__init__.py" = ["F401"]
