

[metadata]
name = cramming
version = 0.1.0
author = Jonas Geiping
author_email = jonas.geiping@gmail.com
url = https://github.com/JonasGeiping/cramming
description = Cramming the training of a (BERT-type) language model into limited compute.
long_description = file: README.md, LICENSE.md
long_description_content_type = text/markdown
license = MIT
license_file = LICENSE.md
platform = any
keywords = Machine Learning, Language Modeling
classifiers =
    Topic :: Language Modeling
    License :: OSI Approved :: MIT License
    Operating System :: OS Independent
    Programming Language :: Python
homepage = "https://github.com/JonasGeiping/cramming"
repository = "https://github.com/JonasGeiping/cramming"
documentation = "https://arxiv.org/abs/2212.14034"

[options]
zip_safe = False
include_package_data = True
python_requires = >= 3.9
packages = find:

setup_requires =
    setuptools

install_requires =
    torch >= 2.0.0
    hydra-core >= 1.1
    datasets
    tokenizers
    transformers
    evaluate
    scipy
    scikit-learn # for metrics
    pynvml
    psutil
    einops
    safetensors
    # apache-beam  # only used for wikipedia, terrible dependencies
    zstandard    # only used for the Pile

scripts =
  pretrain.py
  eval.py

[options.package_data]
* =  "*.yaml", "*.txt"


[check-manifest]
ignore =
    .ipynb
    .sh


#basically the pytorch flake8 setting from https://github.com/pytorch/pytorch/blob/master/.flake8
[flake8]
select = B,C,E,F,P,T4,W,B9
max-line-length = 140
# C408 ignored because we like the dict keyword argument syntax
# E501 is not flexible enough, we're using B950 instead
ignore =
    E203,E305,E402,E501,E721,E741,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
per-file-ignores = __init__.py: F401 torch/utils/cpp_extension.py: B950
optional-ascii-coding = True
exclude =
    .git,
    __pycache__,
    scripts,
    tables,
    outputs,
    *.pyi
