import random
import numpy as np
from codebleu.syntax_match import calc_syntax_match


def parent_selection(pop,m, multi_obj=False):
    if multi_obj:
        probs = dominance_dissimilarity_probs(pop)
        parents = np.random.choice(pop, m, replace=False, p=probs)
    else:
        ranks = [i for i in range(len(pop))]
        probs = [1 / (rank + 1 + len(pop)) for rank in ranks]  # NOTE: doesn't have to sum up to 1
        parents = random.choices(pop, weights=probs, k=m)
    return parents

def reduc_selection(reduc_pop,m):  # NOTE: still single-obj for now (i.e., reductions yielding lower cost are more likely to be selected)
    probs = np.zeros(len(reduc_pop))
    invalid_reducs = []
    for r, reduc in enumerate(reduc_pop):
        fitness = reduc['objective']
        if fitness is not None and not np.isnan(fitness):  # NOTE: softmax is too strict, so using this formula
            if fitness < 0:
                probs[r] = 1 / abs(fitness)  # NOTE: assuming 'objective' is negative (e.g., -cost of sth as in TSP, CVRP), hence requiring abs()
            elif fitness > 0:
                probs[r] = fitness
        else:
            invalid_reducs.append(reduc)
    # probs = [1 / abs(reduc['objective']) for reduc in reduc_pop]
    parents = random.choices(reduc_pop, weights=probs, k=m)
    parents.extend(invalid_reducs)  # has share of 1 even if nan fitness
    return parents


def dominance_dissimilarity_probs(pop):
    pop_size = len(pop)
    dominated_counts = np.zeros((pop_size, pop_size))
    for i in range(pop_size):
        for j in range(i + 1, pop_size):
            ind_i_score = [pop[i]['objective'], -pop[i]['runtime']]  # the higher score the better
            ind_j_score = [pop[j]['objective'], -pop[j]['runtime']]
            if (np.array(ind_i_score) >= np.array(ind_j_score)).all():  # j is dominated by i
                dominated_counts[i, j] = -calc_syntax_match([pop[i]['code']], pop[j]['code'], 'python')
            elif (np.array(ind_j_score) >= np.array(ind_i_score)).all():  # i is dominated by j
                dominated_counts[j, i] = -calc_syntax_match([pop[j]['code']], pop[i]['code'], 'python')
    dominated_counts_ = dominated_counts.sum(0)
    p = np.exp(dominated_counts_) / np.exp(dominated_counts_).sum()  # essentially softmax
    return p
