import os
import sys
import tempfile
import json
# import argparse
import azureml
from azureml.core import Workspace, Experiment, Environment
from azureml.core.compute import ComputeTarget
from azureml.core import ScriptRunConfig, RunConfiguration
from azureml.core.runconfig import DockerConfiguration
from azureml.contrib.core.gjdrunconfig import GlobalJobDispatcherConfiguration
from azureml.core.conda_dependencies import CondaDependencies  # from azureml.widgets import RunDetails
from datetime import datetime
def parser():
    import easydict
    args = easydict.EasyDict({
        "use_gjd": True, #True
        "region": "westus2",
        "cpu": False,
        "exp_name": "zhijun_pytorch", #PLC_pytorch/PLC_synthesize/PLC_challenge/PLC_multi_task
        "info": "ar libritts tfcodec 16",
        "nb_repro": 1,
        "train_config": "config.yaml",
        "checkpoint_path": None,
        "checkpoint_dir": None,
    })
    return args

if __name__ == '__main__':
    import argparse
    external_parser = argparse.ArgumentParser(description='Process command line parameters.')
    external_parser.add_argument('--info', type=str, default="baseline",
                        help='info of experiments')
    external_parser.add_argument('--config', type=str, default=None,
                        help='config')
    ext_args = external_parser.parse_args()
    info = ext_args.info
    exp_config = ext_args.config
    args = parser()
    args.info = "ar libritts tfcodec 16"
    DATASTORE_DICT = {"packet_loss_concealment": 'zhijun_data'} # datastore need to be registered first on the workspace
    REGION_DICT = {"westus2": 'packet_loss_concealment'}
    VC_DICT = {"packet_loss_concealment": 'Westus2.PacketLossConcealment2',}
    workspace_name = REGION_DICT[args.region]
    datastore_name = DATASTORE_DICT[workspace_name]
    vc_name = VC_DICT[workspace_name]
    
    # from azureml.core.authentication import InteractiveLoginAuthentication
    # InteractiveLoginAuthentication(force=True)
    # init workspace
    ws = Workspace(
        subscription_id="f1f491ac-0340-4e5d-87b7-47692be1cb31",
        resource_group="IC3_Common_GPU_cluster",
        workspace_name=workspace_name
    )
    # increase shared memory. NB: must be set before the env def
    # azureml.core.environment._DEFAULT_SHM_SIZE = '10000g'  # enlarge for lmdb
    # # init environment
    # env = Environment.get(ws, "valle-4-23-noconda-v2")
    env = Environment(name="valle-4-23-noconda-v9")
    env.docker.enabled = True
    # # env.docker.base_image = "1294582975/valle-4-23-noconda:v0" #"huxue-pytorch1.7-cuda11-python3.7
    env.docker.base_image = "jiazhijun/valle-4-23-noconda:v9" #"huxue-pytorch1.7-cuda11-python3.7
    env.docker.base_image_registry.address = 'msrresrchcr.azurecr.io' #"ic3aicommonpoolregistry.azurecr.io"
    env.docker.base_image_registry.username = 'msrresrchcr' #"ic3aicommonpoolregistry" #service_principal_id # in order to push
    # env.docker.base_image_registry.password = '' #"${TVFCKXng/8L+Y7gX2C8wRk68j41YYksrDWE0lm2OEL+ACRDF23+Y}"
    env.docker.base_image_registry.password = "VNaA+VSV0v8S+u5k6qcrBgz4FfepnEuMinm/bINT9j+ACRCzBz0+"
    # specifying your own Python interpreter:
    # https://docs.microsoft.com/en-us/azure/machine-learning/how-to-use-environments#specify-your-own-python-interpreter
    env.python.user_managed_dependencies = True
    # env.python.interpreter_path = "/usr/bin/python3"
    env.python.interpreter_path = "/opt/conda/bin/python"
    env.register(workspace=ws)
    env.build(ws)
    # import pdb; pdb.set_trace()
    # Setup the run_config using a dict
    run_config_struct = dict()
    run_config_struct["node_count"] = 1
    run_config_struct["environment"] = env._serialize_to_dict(env)
    run_config_struct["target"] = " "
    if not args.use_gjd:
        run_config_struct["target"] = "evalNC24" if args.cpu else "PrimaryTrain"
    # Serialize the dict to json file
    project_dir = tempfile.mkdtemp()
    os.makedirs(os.path.join(project_dir, ".amlcompute"))
    run_config_path = os.path.join(project_dir, ".amlcompute", "simple.runconfig")
    with open(run_config_path, "w") as outfile:
        json.dump(run_config_struct, outfile)
    run_config = RunConfiguration.load(run_config_path)
    #docker
    run_config.docker = DockerConfiguration(use_docker=True, shared_volumes=True, shm_size='2000g')
    # get cluster
    if args.use_gjd:   # Enable global job dispatcher to leverage idle compute resources from other workspaces
        # vc_list = ["Microsoft.IC3.PrimaryTrain.{}".format(vc_name)]
        vc_list = ["Microsoft.IC3.Unified.Westus2.{}".format(vc_name)]
        run_config.global_job_dispatcher = GlobalJobDispatcherConfiguration(compute_type="Amlcompute",
                                                                                   region=[],
                                                                                   vc_block_list=vc_list)
        compute_target = None
    elif args.cpu:  # Only use a specific node in a single workspace
        compute_target = "evalNC24" #ws.compute_targets["evalNC24"] #ComputeTarget(workspace=ws, name="evalNC24")
    else:
        compute_target = "PrimaryTrain" #ComputeTarget(workspace=ws, name="PrimaryTrain")
    # compute_target = ComputeTarget(workspace=ws, name="evalNC24")
    # print(compute_target)

      #     --manifest-dir /mnt/shared/LibriTTS/data_valle/data/tokenized/ 
      # --text-tokens /mnt/shared/LibriTTS/data_valle/data/tokenized/unique_text_tokens.k2symbols 
      # --exp-dir /mnt/shared/LibriTTS/data_valle/data/output/$${dir_name}_$${basestr}
       #---------PLC data------------
    # data_ref = ws.datastores[datastore_name].path("PLC/data_folder/PLC/tfrecord_4s_half_frm_pl_JBC_lmdb_2/librivox600hrs_random_pl_10_20_30_40_50_markov_0_train_399_20.0ms_shift_10.0ms_drop_0.0ms.lmdb").as_mount() # for offline data    
    # val_ref = ws.datastores[datastore_name].path("PLC/data_folder/PLC/tfrecord_4s_half_frm_pl_JBC_lmdb_2/val_librivox600hrs_random_pl_10_20_30_40_50_markov_0_train_2999_20.0ms_shift_10.0ms_drop_0.0ms.lmdb").as_mount() # for offline data
    # data_ref = ws.datastores[datastore_name].path("librivox600hrs_random_pl_10_20_30_40_50_markov_0_train_399_20.0ms_shift_10.0ms_drop_0.0ms.lmdb").as_mount() # for offline data    
    # val_ref = ws.datastores[datastore_name].path("val_librivox600hrs_random_pl_10_20_30_40_50_markov_0_train_2999_20.0ms_shift_10.0ms_drop_0.0ms.lmdb").as_mount() # for offline data
    # train_ref = ws.datastores[datastore_name].path("hesam_train/tfnetv4_baseline_reproduce").as_mount()
    
    name="VALLE"  
    max_duration=50  
    dtype="float32"  
    base_lr=0.01
    world_size=8
    echo=60
    train_stage=1
    start_epoch=1
    current_time = datetime.now()
    timestamp = current_time.strftime("%Y_%m_%d_%H_%M_%S")
    accumulate_grad_steps=4
    prefix_mode=1
    input_semantic = "True"
    only_autoregressive = "True"
    num_quantizers = 1

    sheduler_steps=5000
    sheduler_epochs=4
    
    semantic_depup="False"
    semantic_remove = "True"
    is_pretrain = "True"
    pret_mode = 7 # 0 mask 1 del 2 infilling
    pret_prob = 0.3
    pret_lam =3
    pret_token = 500
    decoder_dim = 1024
    nhead = 16
    num_decoder_layers = 12

    dir_name=f"pret_{name}_m-dur_{max_duration}_dtp_{dtype}_b-lr_{base_lr}_t-stg_{train_stage}_eo_{echo}_s_eo_{start_epoch}_p_md_{prefix_mode}_i_seman_{input_semantic}_o_ar_{only_autoregressive}_n_qua_{num_quantizers}_s_seps_{sheduler_steps}_s_epoc_{sheduler_epochs}_a_g_s_{accumulate_grad_steps}_s_depup_{semantic_depup}_s_rmv_{semantic_remove}_mode_{pret_mode}_p_{pret_prob}_lam_{pret_lam}_tok_{pret_token}_dim_{decoder_dim}_n_{nhead}_n_d_lrs_{num_decoder_layers}" 
# https://resrchvc4data.blob.core.windows.net/v-zhijun/data/l1_l2_arctic/lhotse_data_vc/tokenized/tokenized_24k_layer9/cuts_dev.jsonl.gz
    # exp_dir = ws.datastores[datastore_name].path(f"data/LibriTTS/lhotse_vc/output_vc/{dir_name}_{timestamp}").as_mount()
    # exp_dir = ws.datastores[datastore_name].path(f"data/LibriTTS/lhotse_vc/output_vc/{dir_name}_{timestamp}").as_mount()
    exp_dir = ws.datastores[datastore_name].path(f"data/LibriTTS/lhotse_vc/output_vc/pret_VALLE_m-dur_50_dtp_float32_b-lr_0.01_t-stg_1_eo_20_s_eo_1_p_md_1_i_seman_True_o_ar_True_n_qua_1_s_seps_5000_s_epoc_4_a_g_s_4_s_depup_False_s_rmv_True_mode_7_p_0.3_lam_3_tok_500_dim_1024_n_16_n_d_lrs_12_2023_08_31_05_43_50").as_mount()
    current_time = datetime.now()
    timestamp = current_time.strftime("%Y_%m_%d_%H_%M_%S")

    manifest_dir = ws.datastores[datastore_name].path("data/LibriTTS/lhotse_vc/vc_tokenized_16k_tfcodec_16codes").as_mount()
    text_tokens = ws.datastores[datastore_name].path("data/LibriTTS/lhotse_vc/vc_tokenized_16k_tfcodec_16codes/unique_text_tokens.k2symbols").as_mount()
    semantic_tokens = ws.datastores[datastore_name].path("data/LibriTTS/lhotse_vc/vc_tokenized_16k_tfcodec_16codes/unique_semantic_tokens.k2symbols").as_mount()

    warmup_ref = None
    # ckpt_ref = ws.datastores[datastore_name].path("${pretrained_ckpt_path}").as_mount()
    ckpt_ref = None
    checkpoint_dir_ref=None
     
    print(f"timestamp : {timestamp}")
    arg_list = [
        "--nproc-per-node", world_size,
        "--nnodes", 1,        
        "--max-duration", max_duration,
        "--filter-min-duration", 0.5,
        "--filter-max-duration", 14,
        "--train-stage", train_stage,
        "--num-buckets", 6,
        "--dtype", dtype,
        "--save-every-n", 20000000000,
        "--log-interval", 500,
        "--valid-interval", 2000,   
        "--model-name", "valle",
        "--share-embedding", True,  
        "--norm-first", True,  
        "--add-prenet", False,  
        "--decoder-dim", decoder_dim,  
        "--nhead",nhead,  
        "--num-decoder-layers", num_decoder_layers,
        "--num-quantizers", num_quantizers,  
        "--prefix-mode", prefix_mode,  
        "--base-lr", base_lr,  
        "--warmup-steps", 200,  
        "--average-period", 0,  
        "--num-epochs", echo,  
        "--start-epoch", start_epoch,  
        "--start-batch", 0,  
        "--accumulate-grad-steps",accumulate_grad_steps,  
        "--world-size", world_size,
        "--manifest-dir", str(manifest_dir),
        "--text-tokens", str(text_tokens),
        "--semantic-tokens", str(semantic_tokens),
        "--exp-dir", str(exp_dir),
        "--newfile-suffix", str(timestamp),
        "--is-local", False,
        "--input-semantic", str(input_semantic),
        "--only-autoregressive", str(only_autoregressive),
        "--sheduler-steps", sheduler_steps,
        "--sheduler-epochs", sheduler_epochs,
        "--semantic-depup", semantic_depup,
        "--semantic-remove", semantic_remove,
        "--is-pretrain", "True",
        "--pret-mode", pret_mode,
        "--pret-prob", pret_prob,
        "--pret-lam", pret_lam,
        "--pret-token", pret_token
    ]
    # estimator
    src_dir = "./" # parent dir
    estimator = ScriptRunConfig(
        source_directory=src_dir,
        script="egs/libritts/bin/multiprocess_caller.py",
        run_config=run_config,
        arguments=arg_list,
        compute_target=compute_target,
    )
    estimator.run_config.target = " "
    estimator.run_config.data_references[manifest_dir.data_reference_name] = manifest_dir.to_config()
    estimator.run_config.data_references[text_tokens.data_reference_name] = text_tokens.to_config()
    estimator.run_config.data_references[semantic_tokens.data_reference_name] = semantic_tokens.to_config()

    estimator.run_config.data_references[exp_dir.data_reference_name] = exp_dir.to_config()

    
    # create experiment
    exp = Experiment(workspace=ws, name=args.exp_name)
    

    # submit a run
    for i in range(args.nb_repro):
        info_tag = {"info": "{} r{}".format(args.info, i)}
        run = exp.submit(estimator, tags=info_tag)
        # RunDetails(run).show()
        print(run.get_details())
        # last_run = list(exp.get_runs())[0]  
