function lambda_choose = mean_iter_CV(data, theta_initial, values, K)
    M = size(data, 1) - 1;
    p = size(data{1, 1}, 2);
    for i = 1:length(values)
        lambda = values(i);
        for j = 1:5
            % valid set and train set
            valid_X = [];
            valid_Y = [];
            for m = 1:M
                X_m = data{m, 1};
                Y_m = data{m, 2};
                BY_m = data{m, 3};
                n_m = size(X_m, 1);
                rowrank_m = randperm(n_m, round(n_m/5));
                data_tmp{m, 1} = X_m(setdiff(1:n_m, rowrank_m), :);
                data_tmp{m, 2} = Y_m(setdiff(1:n_m, rowrank_m));
                data_tmp{m, 3} = BY_m(setdiff(1:n_m, rowrank_m));
                valid_X = [valid_X; X_m(rowrank_m, :)];
                valid_Y = [valid_Y; Y_m(rowrank_m)];
            end

            % compute Sigma_1
            train_X1 = data_tmp{1,1};
            train_Y1 = data_tmp{1,2};
            cov_class = zeros(p,p);
            for k = 1:K
                ind_k = find(train_Y1 == k);
                X_class = train_X1(ind_k,:);
                cov_class = cov_class + size(X_class,1) * cov(X_class);
            end
            Sigma_1 = cov_class/size(train_X1,1);

            % compute Sigma and delta
            X_t = [];
            Y_t = [];
            for m = 1:M
                X_t = [X_t; data_tmp{m,1}];
                Y_t = [Y_t; data_tmp{m,3}];
            end
            cov_class = zeros(p, p);
            for k = 1:K
                ind_k = find(Y_t == k);
                X_class = X_t(ind_k,:);
                mu_hat(:,k) = mean(X_class, 1)';
                pi_hat(k) = size(X_class,1);
                cov_class = cov_class + size(X_class,1) * cov(X_class);
            end
            Sigma = cov_class/size(X_t,1);
            pi_hat = pi_hat/sum(pi_hat);
            delta_hat = mu_hat - mu_hat(:,1);
            delta_hat(:,1) = [];
            delta = (Sigma_1 - Sigma) * theta_initial + delta_hat;

            % obtain valid accuracy
            theta_update = ISTA(Sigma_1, delta, lambda, 0.01);

            theta = [zeros(p, 1) theta_update];
            pred_value = bayes_value(valid_X, theta, mu_hat, pi_hat, K);
            [max_a, index] = max(pred_value');
            index = index';
            cv_accuracy(j) = mean(index == valid_Y);
        end
        lambda_accuracy(i) = mean(cv_accuracy);
     end
        lambda_choose = values(find(lambda_accuracy == max(lambda_accuracy))); % mean
        lambda_choose = lambda_choose(1);
end