import argparse

from train_utils import add_flags_from_config

config_args = {
    'training_config': {
        'lr': (0.01, 'learning rate'),
        'curvature-lr': (1e-4, 'learning rate for curvatures'),
        'dropout': (0.0, 'dropout probability'),
        'cuda': (0, 'which cuda device to use (-1 for cpu training)'),
        'epochs': (2000, 'maximum number of epochs to train for'),
        'weight-decay': (0., 'l2 regularization strength'),
        'optimizer': ('Adam', 'which optimizer to use, can be any of [Adam, RiemannianAdam]'),
        'momentum': (0.999, 'momentum in optimizer'),
        'patience': (100, 'patience for early stopping'),
        'seed': (1234, 'seed for training'),
        'log-freq': (1, 'how often to compute print train/val metrics (in epochs)'),
        'eval-freq': (1, 'how often to compute val metrics (in epochs)'),
        'save': (0, '1 to save model and logs and 0 otherwise'),
        'save-dir': (None, 'path to save training logs and model weights (defaults to logs/task/date/run/)'),
        'sweep-c': (0, ''),
        'lr-reduce-freq': (None, 'reduce lr every lr-reduce-freq or None to keep lr constant'),
        'gamma': (0.5, 'gamma for lr scheduler'),
        'print-epoch': (True, ''),
        'grad-clip': (None, 'max norm for gradient clipping, or None for no gradient clipping'),
        'min-epochs': (100, 'do not early stop before min-epochs'),
        'project': ('dummy', 'project name for wandb logging'),
        'exp-name': ('dummy', 'exp name for wandb logging')
    },
    'model_config': {
        'task': ('nc', 'which tasks to train on, can be any of [nc, md]'),
        'model': ('FPST', 'which encoder to use, can be any of [TokenGT, FPST]'),
        'tied-weights': (False, 'use Recursive Transformer?'),
        'attn-type': ('vanilla', 'which attention to use'),
        'attn-dropout': (0.0, 'how much dropout for attention'),
        'hops': (5, 'number of hops to use for TokenGT tokenization'),
        'lap-k': (64, 'how many eigenvectors to use for TokenGT'),
        'lap-dropout': (0.0, 'how much dropout for eigenvectors'),
        'lap-sign-flip': (False, 'flip eigenvector signs?'),
        'head-dim': (32, 'embedding dimension in attn heads'),
        'layer-norm': (1, 'whether to use Layer Norm for Transformers'),
        'dim': (128, 'embedding dimension'),
        'manifold': ('Euclidean', 'which manifold to use, can be any of [Euclidean, Hyperboloid, PoincareBall, PseudoHyperboloid]'),
        'c': (None, 'hyperbolic radius, set to None for trainable curvature'),
        'r': (2., 'fermi-dirac decoder parameter for lp'),
        't': (1., 'fermi-dirac decoder parameter for lp'),
        'pretrained-embeddings': (None, 'path to pretrained embeddings (.npy file) for Shallow node classification'),
        'pos-weight': (0, 'whether to upweight positive class in node classification tasks'),
        'num-layers': (2, 'number of hidden layers in encoder'),
        'bias': (1, 'whether to use bias (1) or not (0)'),
        'act': ('ReLU', 'which activation function to use (or None for no activation)'),
        'num-heads': (4, 'number of attention heads for graph attention networks, must be a divisor dim'),
        'alpha': (0.2, 'alpha for leakyrelu in graph attention networks'),
        'double-precision': ('0', 'whether to use double precision'),
        'use-att': (0, 'whether to use hyperbolic attention or not'),
        'local-agg': (0, 'whether to local tangent space aggregation or not'),
        'space-dim': (None, 'whether to local tangent space aggregation or not'),
        'time-dim': (1, 'whether to local tangent space aggregation or not'),
        'lp-decoder': ('fermi-dirac', 'which decoder to use for link prediction, can be any one of [fermi-dirac, fusion, hadamard, concat, dot, bilinear]')
    },
    'data_config': {
        'dataset': ('cora', 'which dataset to use'),
        'val-prop': (0.05, 'proportion of validation edges for link prediction'),
        'test-prop': (0.1, 'proportion of test edges for link prediction'),
        'use-feats': (1, 'whether to use node features or not'),
        'normalize-feats': (0, 'whether to normalize input node features'),
        'normalize-adj': (1, 'whether to row-normalize the adjacency matrix'),
        'split-seed': (1000, 'seed for data splits (train/test/val)'),
    }
}

parser = argparse.ArgumentParser()
for _, config_dict in config_args.items():
    parser = add_flags_from_config(parser, config_dict)
