
import numpy as np
from sklearn.model_selection import ShuffleSplit
from svsvr import SVSVC, Shapley_kernel

def precomputed_kernel_GridSearchCV(X, y, Cs, qds, n_splits=5, test_size=0.2, random_state=42):
    """A version of grid search CV, 
    but adapted for SVM with a precomputed kernel
    K (np.ndarray) : precomputed kernel
    y (np.array) : labels
    Cs (iterable) : list of values of C to try
    return: optimal value of C
    """
    from sklearn.model_selection import ShuffleSplit
 
    n = X.shape[0]
    #assert len(K.shape) == 2
    #assert K.shape[1] == n
    assert len(y) == n
    
    best_score = float('-inf')
    best_C = None
    bset_q = None
 
    indices = np.arange(n)
    
    for q in qds:
        K = Shapley_kernel(X, X, q_additivity = q)
        for C in Cs:
            # for each value of parameter, do K-fold
            # The performance measure reported by k-fold cross-validation 
            # is the average of the values computed in the loop
            scores = []
            ss = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=random_state)
            for train_index, test_index in ss.split(indices):
                K_train = K[np.ix_(train_index,train_index)]
                K_test = K[np.ix_(test_index, train_index)]
                y_train = y[train_index]
                y_test = y[test_index]
                svc = SVSVC(q_additivity=q, C=C)
                svc.fit(K_train, y_train, type='kernel')
                scores.append(svc.score(K_test, y_test))
            if np.mean(scores) > best_score:
                best_score = np.mean(scores)
                best_K = K
                best_C = C
                best_q = q
    return best_C, best_q, best_K, best_score