import torch
import sys
import os

from find_high_accuracy_path_v2.find_parameters import ParameterMove, ParameterTrain, ParameterRebuildNorm, ParameterGeneral
from find_high_accuracy_path_v2.runtime_parameters import RuntimeParameters

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from py_src.ml_setup import MlSetup

model_name = 'shufflenet_v2'

def get_parameter_general(runtime_parameter: RuntimeParameters, ml_setup: MlSetup):
    output = ParameterGeneral()
    if ml_setup.model_name == model_name:
        output.max_tick = 40000
        output.dataloader_worker = 8
        output.test_dataset_use_whole = True
    else:
        raise NotImplemented
    return output

def get_parameter_move(runtime_parameter: RuntimeParameters, ml_setup: MlSetup):
    output = ParameterMove()
    ignore_list = ["running_mean", "running_var", "num_batches_tracked"]
    if ml_setup.model_name == model_name:
        if runtime_parameter.current_tick == 0:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage2", "stage3", "stage4", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 2000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage2.1", "stage2.2", "stage2.3", "stage3", "stage4", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 4000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage2.2", "stage2.3", "stage3", "stage4", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 6000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage2.3", "stage3", "stage4", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 8000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage3", "stage4", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 10000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage3.1", "stage3.2", "stage3.3", "stage3.4", "stage3.5", "stage3.6", "stage3.7", "stage4", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 12000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage3.2", "stage3.3", "stage3.4", "stage3.5", "stage3.6", "stage3.7", "stage4", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 14000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage3.3", "stage3.4", "stage3.5", "stage3.6", "stage3.7", "stage4", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 16000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage3.4", "stage3.5", "stage3.6", "stage3.7", "stage4", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 18000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage3.5", "stage3.6", "stage3.7", "stage4", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 20000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage3.6", "stage3.7", "stage4", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 22000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage3.7", "stage4", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 24000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage4", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 26000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage4.1", "stage4.2", "stage4.3", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 28000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage4.2", "stage4.3", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 30000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["stage4.3", "fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 32000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = ["fc"] + ignore_list
            output.merge_bias_with_weights = False
        elif runtime_parameter.current_tick == 34000:
            output.step_size = 0
            output.adoptive_step_size = 0.002
            output.layer_skip_move = []
            output.layer_skip_move_keyword = [] + ignore_list
            output.merge_bias_with_weights = False
        else:
            return None
    else:
        raise NotImplemented
    return output


def get_parameter_train(runtime_parameter: RuntimeParameters, ml_setup: MlSetup):
    output = ParameterTrain()
    if ml_setup.model_name == model_name:
        if runtime_parameter.current_tick == 0:
            output.train_for_max_rounds = 20000
            output.train_for_min_rounds = 20
            output.train_until_loss = 0.03
            output.pretrain_optimizer = True
            output.load_existing_optimizer = False
        else:
            return None
    else:
        raise NotImplemented
    return output

def get_optimizer_train(runtime_parameter: RuntimeParameters, ml_setup: MlSetup, model_parameter):
    if ml_setup.model_name == model_name:
        if runtime_parameter.current_tick == 0:
            optimizer = torch.optim.SGD(model_parameter, lr=0.001)
        else:
            return None
    else:
        raise NotImplemented
    return optimizer

def get_parameter_rebuild_norm(runtime_parameter: RuntimeParameters, ml_setup: MlSetup):
    output = ParameterRebuildNorm()
    if ml_setup.model_name == model_name:
        if runtime_parameter.current_tick == 0:
            output.rebuild_norm_for_max_rounds = 0
            output.rebuild_norm_for_min_rounds = 0
            output.rebuild_norm_until_loss = 0
            output.rebuild_norm_layer = []
            output.rebuild_norm_layer_keyword = ['bn']
        else:
            return None
    else:
        raise NotImplemented
    return output

def get_optimizer_rebuild_norm(runtime_parameter: RuntimeParameters, ml_setup: MlSetup, model_parameter):
    if ml_setup.model_name == model_name:
        if runtime_parameter.current_tick == 0:
            optimizer = torch.optim.SGD(model_parameter, lr=0.001, momentum=0.9, weight_decay=5e-4)
        else:
            return None
    else:
        raise NotImplemented
    return optimizer