import sys
import os

from gpatch.training.arguments import gpatch_extra_args


def get_tasks_args(parser):
    parser = gpatch_extra_args(parser)

    group = parser.add_argument_group(title='dqa-ppo-actor')
    group.add_argument('--override-opt-param-scheduler',
                       action='store_true',
                       help='Whether to override the optimizer parameter scheduler',
                       )
    group.add_argument('--ppo-sampling-keeping-strategy',
                       type=str,
                       default='best-and-worst',
                       choices=['best-and-worst', 'test', 'all', 'pods', 'random','d3s'],
                       help='''
ppo 多重采样保留策略。需要自己确保能 match `--ppo-sampling-keep` 。
                       ''')
    # pods
    group.add_argument('--ppo-sampling-keeping-strategy-pods-cross-batch',
                       action='store_true',
                       help='Whether to apply pods across batches',
                       )
    group.add_argument('--ppo-sampling-dynamic',
                       type=str,
                       default='static',
                       choices=['static', 'linear'],
                       help='Dynamic pods sampling strategy',
                       )
    group.add_argument('--ppo-sampling-keep-final',
                       type=int,
                       default=-1,
                       help='ppo prompt 重复采样后保留个数，当使用linear pods时生效')
    # gspo
    group.add_argument('--use-gspo', action='store_true', help='use gspo')

    # token-level
    group.add_argument('--ppo-token-dropout-strategy',
                       type=str,
                       default='all',
                       choices=['all', 'd2s', 'linear-decrease', 'linear-increase', 'dpp', 'random'],
                       help='Token-level dropout strategy',
                       )
    
    # critic
    group.add_argument('--ppo-critic-rule-reward-parse-log',
                       type=bool,
                       default=False,
                       help='Whether to log the rule reward parsing',
                       )
    group.add_argument('--ppo-enable-thinking',
                       type=bool,
                       default=False,
                       help='Whether to enable thinking mode (prompt and critic format reward)',
                       )

    return parser
