(model, weight_decay=1e-5, skip_list=(), get_num_layer=None, get_layer_scale=None)
| 70 | |
| 71 | |
| 72 | def get_parameter_groups(model, weight_decay=1e-5, skip_list=(), get_num_layer=None, get_layer_scale=None): |
| 73 | parameter_group_names = {} |
| 74 | parameter_group_vars = {} |
| 75 | |
| 76 | for name, param in model.named_parameters(): |
| 77 | if not param.requires_grad: |
| 78 | continue # frozen weights |
| 79 | if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list: |
| 80 | group_name = "no_decay" |
| 81 | this_weight_decay = 0. |
| 82 | else: |
| 83 | group_name = "decay" |
| 84 | this_weight_decay = weight_decay |
| 85 | if get_num_layer is not None: |
| 86 | layer_id = get_num_layer(name) |
| 87 | group_name = "layer_%d_%s" % (layer_id, group_name) |
| 88 | else: |
| 89 | layer_id = None |
| 90 | |
| 91 | if group_name not in parameter_group_names: |
| 92 | if get_layer_scale is not None: |
| 93 | scale = get_layer_scale(layer_id) |
| 94 | else: |
| 95 | scale = 1. |
| 96 | |
| 97 | parameter_group_names[group_name] = { |
| 98 | "weight_decay": this_weight_decay, |
| 99 | "params": [], |
| 100 | "lr_scale": scale |
| 101 | } |
| 102 | parameter_group_vars[group_name] = { |
| 103 | "weight_decay": this_weight_decay, |
| 104 | "params": [], |
| 105 | "lr_scale": scale |
| 106 | } |
| 107 | |
| 108 | parameter_group_vars[group_name]["params"].append(param) |
| 109 | parameter_group_names[group_name]["params"].append(name) |
| 110 | print("Param groups = %s" % json.dumps(parameter_group_names, indent=2)) |
| 111 | return list(parameter_group_vars.values()) |
| 112 | |
| 113 | |
| 114 | def create_optimizer(args, model, get_num_layer=None, get_layer_scale=None, filter_bias_and_bn=True, skip_list=None): |
no test coverage detected