[project]
name = 'custom_colbert'
dynamic = ["version"]
description = 'This repository centralizes and organizes dataset management for large language model training.'
authors = [
    { name = 'Manuel Faysse', email = 'manuel.faysse@illuin.tech' },
    { name = 'Hugues Sibille', email = 'hugues.sibille@illuin.tech' },
    { name = 'Tony Wu', email = 'tony.wu@illuin.tech' },
]
readme = 'README.md'
requires-python = '>=3.11.6'
classifiers = [
    'Intended Audience :: Science/Research',
    'Intended Audience :: Developers',
    'Operating System :: OS Independent',
    'Private :: Do Not Upload',
    'Programming Language :: Python :: 3',
    'Programming Language :: Python :: 3.11.6',
    'Topic :: Scientific/Engineering :: Artificial Intelligence',
    'Typing :: Typed',
]
dependencies = [
    "accelerate==0.30.1",
    "anthropic==0.26.1",
    "configue==5.0.0",
    "datasets==2.19.1",
    "diskcache==5.6.3",
    "einops==0.8.0",
    "FlagEmbedding==1.2.10",
    "google-cloud-aiplatform==1.52.0",
    "litellm==1.38.8",
    "mteb==1.12.22",
    "pdf2image==1.17.0",
    "peft==0.11.1",
    "pytesseract==0.3.10",
    "python-dotenv==1.0.1",
    "rank-bm25==0.2.2",
    "scikit-learn==1.5.0",
    "sentencepiece==0.2.0",
    "sentence-transformers==2.7.0",
    "tenacity==8.3.0",
    "torch==2.3.0",
    "transformers==4.41.1",
    "typer==0.12.3",
]

[project.optional-dependencies]
dev = [
    "black>=24.4.2",
    "coverage>=7.5.2",
    "ipykernel>=6.29.4",
    "mypy>=1.10.0",
    "pytest>=8.2.1",
    "ruff>=0.4.5",
]
dataset = ["unstructured[all-docs]==0.13.2"]

[project.urls]
homepage = "https://github.com/ManuelFay/retriever-training"

[build-system]
requires = ['setuptools', 'setuptools_scm[toml]', 'wheel']
build-backend = 'setuptools.build_meta'

[tool.setuptools_scm]
fallback_version = '0.0.0-dev'

[tool.setuptools]
zip-safe = false
platforms = ['any']

[tool.setuptools.packages.find]
include = ['custom_colbert', 'custom_colbert.*']

[tool.mypy]
check_untyped_defs = true
disallow_untyped_defs = true
enable_error_code = ['ignore-without-code']
exclude = ['docs/']
mypy_path = '$MYPY_CONFIG_FILE_DIR/typings'
no_implicit_optional = true
show_error_codes = true
warn_redundant_casts = true
warn_return_any = true
warn_unused_configs = true
warn_unused_ignores = true
warn_unreachable = true

[[tool.mypy.overrides]]
module = ['transformers', 'transformers.*', 'torch', 'torch.*']
ignore_missing_imports = true

[tool.coverage.run]
include = []

[tool.coverage.report]
exclude_lines = [
    'pragma: no cover',
    'raise NotImplementedError',
    'if __name__ == "__main__":',
    'if TYPE_CHECKING:',
    'def __repr__',
]

[tool.black]
line-length = 120

[tool.ruff]
select = ["E", "F", "W", "I", "N"]
line-length = 120

[tool.ruff.per-file-ignores]
'__init__.py' = ["F401"]
