params = {
    'var_ranges': {  # Define ranges of the variables
        'range_C2' : 5,  # Range of C2; in this case, it is [0, ..., 4]
        'range_C1' : 4, # Range of C1
        'range_C0' : 3, # Range of C0
        'range_x' : 4, # Range of X
        'domain_y' : [-1, 1, 4]  # Set of values Y can take
    },
    'graph': { # Conditional probability distributions and optimal policy
        'optimal_actions' : {   # Optimal policy
            (0, 0) : 3, (0, 1) : 3, (0, 2) : 3, (0, 3) : 3, 
            (1, 0) : 3, (1, 1) : 3, (1, 2) : 3, (1, 3) : 3,
            (2, 0) : 3, (2, 1) : 3, (2, 2) : 3, (2, 3) : 3,
            (3, 0) : 3, (3, 1) : 3, (3, 2) : 3, (3, 3) : 3,
            (4, 0) : 3, (4, 1) : 3, (4, 2) : 3, (4, 3) : 3
        },  #optimal_actions[(c2,c1)] = phi*((c2,c1))
        'c2_distr' : [0.4, 0.15, 0.15, 0.15, 0.15], # c2_distr[i] is P(c2=i)
        'c1_distr' : [0.25, 0.25, 0.25, 0.25], # c1_distr[i] is P(c1=i)            
        'c0_distr' : [
            [0.4, 0.4, 0.2],  
            [0.9, 0.05, 0.05],
            [0.6, 0.3, 0.1], 
            [1, 0, 0], 
            [0.45, 0.45, 0.1] 
        ],  # c0_distr[i] is P(c0|c2=i)
        'y_distr' : [
            [
                [[1, 0, 0], [0.75, 0.25, 0], [0.5, 0.5, 0], [0.25, 0.75, 0]], 
                [[0.75, 0.25, 0], [0.5, 0.5, 0], [0.25, 0.75, 0], [0, 1, 0]], 
                [[0, 0.75, 0.25], [0, 0.5, 0.5], [0, 0.25, 0.75], [0, 0, 1]]               
            ],  
            [
                [[1, 0, 0], [0.75, 0.25, 0], [0.5, 0.5, 0], [0.25, 0.75, 0]], 
                [[0.75, 0.25, 0], [0.5, 0.5, 0], [0.25, 0.75, 0], [0, 1, 0]], 
                [[0, 0.75, 0.25], [0, 0.5, 0.5], [0, 0.25, 0.75], [0, 0, 1]]                  
            ],
            [
                [[0.9, 0, 0.1], [0.8, 0.1, 0.1], [0.6, 0.3, 0.1], [0.35, 0.55, 0.1]], 
                [[0.85, 0.05, 0.1], [0.6, 0.3, 0.1], [0.35, 0.55, 0.1], [0.1, 0.8, 0.1]], 
                [[0.1, 0.55, 0.35], [0.1, 0.3, 0.6], [0.1, 0.05, 0.85], [0.1, 0, 0.9]]                   
            ],
            [
                [[0.9, 0, 0.1], [0.8, 0.1, 0.1], [0.6, 0.3, 0.1], [0.35, 0.55, 0.1]], 
                [[0.85, 0.05, 0.1], [0.6, 0.3, 0.1], [0.35, 0.55, 0.1], [0.1, 0.8, 0.1]], 
                [[0.1, 0.55, 0.35], [0.1, 0.3, 0.6], [0.1, 0.05, 0.85], [0.1, 0, 0.9]]                                 
            ]
        ] # y_distr[i][j][k] = P(y|c1=i, c0=j, x=k)
    },
    'eval': {  # Hyperparameters for evaluation
        'train_runs_list' : [50]*7,  # Number of runs for each of the 7 values of T
        'test_runs_per_train_run_list' : [1]*7,  # Keep it at 1
        'train_T_list' : [16, 24, 32, 40, 48, 56, 64],  # T
        'test_T_list' : [50]*7, # Number of evaluation phase runs for each training phase run, to get lower-variance estimates
        'alpha_list' : [1/2]*7  # Alpha corres. to each value of T; doesn't matter for this experiment (Experiment 3) -- since code just uses alpha=0 for this expt; see main paper discussion for more details.
    }
}

print(params)