import torch
import sys
import os

from find_high_accuracy_path_v2.find_parameters import ParameterMove, ParameterTrain, ParameterRebuildNorm, ParameterGeneral
from find_high_accuracy_path_v2.runtime_parameters import RuntimeParameters

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from py_src.ml_setup import MlSetup

model_name = 'dla'

def get_parameter_general(runtime_parameter: RuntimeParameters, ml_setup: MlSetup):
    output = ParameterGeneral()
    if ml_setup.model_name == model_name:
        output.max_tick = 9000
        output.dataloader_worker = 8
        output.test_dataset_use_whole = True
    else:
        raise NotImplemented
    return output

def get_parameter_move(runtime_parameter: RuntimeParameters, ml_setup: MlSetup):
    output = ParameterMove()
    test_weights_keyword = ['running_mean', 'running_var', 'num_batches_tracked']
    phase_time = 400
    adoptive_step_size = 0.001
    ratio_step_size = 0.002
    if ml_setup.model_name == model_name:
        if runtime_parameter.current_tick == phase_time * 0:
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['base.1','layer1', 'layer2', 'layer3', 'layer4', 'layer5', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 1:
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer1', 'layer2', 'layer3', 'layer4', 'layer5', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 2:
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer2', 'layer3', 'layer4', 'layer5', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 3:
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer3', 'layer4', 'layer5', 'layer6', 'linear', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 4:
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer3.right_node', 'layer3.root', 'layer4', 'layer5', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 5:
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer3.root', 'layer4', 'layer5', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 6:
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer4', 'layer5', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 7:
            # move layer4.prev_root
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer4.level_1', 'layer4.left_node', 'layer4.right_node', 'layer4.root', 'layer5', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 8:
            # move layer4.level_1
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer4.left_node', 'layer4.right_node', 'layer4.root', 'layer5', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 9:
            # move layer4.left_node
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer4.right_node', 'layer4.root', 'layer5', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 10:
            # move layer4.right_node
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer4.root', 'layer5', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 11:
            # move layer4.root
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer5', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 12:
            # move layer5.prev_root
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer5.level_1', 'layer5.left_node', 'layer5.right_node', 'layer5.root', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 13:
            # move layer5.level_1
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer5.left_node', 'layer5.right_node', 'layer5.root', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 14:
            # move layer5.left_node
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer5.right_node', 'layer5.root', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 15:
            # move layer5.right_node
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer5.root', 'layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 16:
            # move layer5.root
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer6', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 17:
            # move layer6.left_node
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer6.right_node', 'layer6.root', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 18:
            # move layer6.right_node
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['layer6.root', 'linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 19:
            # move layer6.root
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ['linear'] + test_weights_keyword
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == phase_time * 20:
            # move linear
            output.step_size = 0
            output.adoptive_step_size = adoptive_step_size
            output.ratio_step_size = ratio_step_size
            output.layer_skip_move = []
            output.layer_skip_move_keyword = [] + test_weights_keyword
            output.merge_bias_with_weights = False
        else:
            return None
    else:
        raise NotImplemented
    return output


def get_parameter_train(runtime_parameter: RuntimeParameters, ml_setup: MlSetup):
    output = ParameterTrain()
    if ml_setup.model_name == model_name:
        if runtime_parameter.current_tick == 0:
            output.train_for_max_rounds = 1000
            output.train_for_min_rounds = 100
            output.train_until_loss = 0.005
            output.pretrain_optimizer = False
            output.load_existing_optimizer = False
        else:
            return None
    else:
        raise NotImplemented
    return output

def get_optimizer_train(runtime_parameter: RuntimeParameters, ml_setup: MlSetup, model_parameter):
    if ml_setup.model_name == model_name:
        if runtime_parameter.current_tick == 0:
            # optimizer = torch.optim.SGD(model_parameter, lr=0.001)
            base_lr = 0.001
            optimizer = torch.optim.SGD(
                [{'params': param, 'lr': base_lr} for param in model_parameter]
            )
        else:
            return None
    else:
        raise NotImplemented
    return optimizer

def get_parameter_rebuild_norm(runtime_parameter: RuntimeParameters, ml_setup: MlSetup):
    output = ParameterRebuildNorm()
    if ml_setup.model_name == model_name:
        if runtime_parameter.current_tick == 0:
            output.rebuild_norm_for_max_rounds = 0
            output.rebuild_norm_for_min_rounds = 0
            output.rebuild_norm_until_loss = 0
            output.rebuild_norm_layer = []
            output.rebuild_norm_layer_keyword = []
        else:
            return None
    else:
        raise NotImplemented
    return output

def get_optimizer_rebuild_norm(runtime_parameter: RuntimeParameters, ml_setup: MlSetup, model_parameter):
    if ml_setup.model_name == model_name:
        if runtime_parameter.current_tick == 0:
            # optimizer = torch.optim.SGD(model_parameter, lr=0.001, momentum=0.9, weight_decay=5e-4)
            base_lr = 0.001
            optimizer = torch.optim.SGD(
                [{'params': param, 'lr': base_lr} for param in model_parameter],
                momentum=0.9, weight_decay=5e-4
            )
        else:
            return None
    else:
        raise NotImplemented
    return optimizer

