

import json


def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75):
    param_group_names = {}
    param_groups = {}

    num_layers = len(model.encoder.layers) + 1

    layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1))

    for n, p in model.named_parameters():
        if not p.requires_grad:
            continue

        # no decay: all 1D parameters and model specific ones
        if p.ndim == 1 or n in no_weight_decay_list:
            g_decay = "no_decay"
            this_decay = 0.
        else:
            g_decay = "decay"
            this_decay = weight_decay

        # different param_groups would make deepspeed slows down
        layer_id = get_layer_id_for_vit(n, num_layers) if layer_decay < 1 else 0
        group_name = "layer_%d_%s" % (layer_id, g_decay)

        if group_name not in param_group_names:
            this_scale = layer_scales[layer_id]

            param_group_names[group_name] = {
                "lr_scale": this_scale,
                "weight_decay": this_decay,
                "params": [],
            }
            param_groups[group_name] = {
                "lr_scale": this_scale,
                "weight_decay": this_decay,
                "params": [],
            }

        param_group_names[group_name]["params"].append(n)
        param_groups[group_name]["params"].append(p)

    print(f"parameter groups: \n{json.dumps(param_group_names, indent=2)}")

    return list(param_groups.values())


def get_layer_id_for_vit(name, num_layers):
    if name.startswith('image_adapter'):
        return 0
    elif name.startswith('encoder.layers'):
        return int(name.split('.')[2]) + 1
    else:
        return num_layers
