/*
 * Decompiled with CFR 0.152.
 */
package it.units.inginf.male.terminalsets;

import it.units.inginf.male.configuration.Configuration;
import it.units.inginf.male.inputs.Context;
import it.units.inginf.male.inputs.DataSet;
import it.units.inginf.male.terminalsets.TerminalSetBuilder;
import it.units.inginf.male.tree.Constant;
import it.units.inginf.male.tree.Leaf;
import it.units.inginf.male.tree.NodeFactory;
import it.units.inginf.male.utils.BasicTokenizer;
import it.units.inginf.male.utils.Tokenizer;
import it.units.inginf.male.utils.Utils;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;

public class TokenizedContextTerminalSetBuilder
implements TerminalSetBuilder {
    private static final Tokenizer tokenizer = new BasicTokenizer();

    private static boolean matchW(String string) {
        return string.length() == 1 && TokenizedContextTerminalSetBuilder.matchW(string.charAt(0));
    }

    private static boolean matchW(char character) {
        return Character.isAlphabetic(character) || Character.isDigit(character) || character == '_';
    }

    @Override
    public void setup(Configuration configuration) {
        this.setup(configuration, configuration.getDatasetContainer().getTrainingDataset());
    }

    public void setup(Configuration configuration, DataSet trainingDataset) {
        Map<String, String> parameters;
        Double TOKEN_THREASHOLD = 80.0;
        Double TOKEN_UNMATCH_THREASHOLD = 80.0;
        boolean DISCARD_W_TOKENS = true;
        if (trainingDataset.getStripedDataset() != null) {
            trainingDataset = trainingDataset.getStripedDataset();
        }
        if ((parameters = configuration.getPopulationBuilderParameters()) != null) {
            if (parameters.containsKey("tokenThreashold")) {
                TOKEN_THREASHOLD = Double.valueOf(parameters.get("tokenThreashold"));
            }
            if (parameters.containsKey("discardWtokens")) {
                DISCARD_W_TOKENS = Boolean.valueOf(parameters.get("discardWtokens"));
            }
            if (parameters.containsKey("tokenUnmatchThreashold")) {
                TOKEN_UNMATCH_THREASHOLD = Double.valueOf(parameters.get("tokenUnmatchThreashold"));
            }
        }
        TreeSet<Character> charset = new TreeSet<Character>();
        NodeFactory nodeFactory = configuration.getNodeFactory();
        HashSet<Leaf> terminalSet = new HashSet<Leaf>(nodeFactory.getTerminalSet());
        for (DataSet.Example example : trainingDataset.getExamples()) {
            for (String string : example.getMatchedStrings()) {
                for (char c : string.toCharArray()) {
                    charset.add(Character.valueOf(c));
                }
            }
        }
        Map<String, Double> winnerTokens = TokenizedContextTerminalSetBuilder.calculateWinnerMatchTokens(trainingDataset, TOKEN_THREASHOLD, DISCARD_W_TOKENS);
        Map<String, Double> winnerUnMatchTokens = TokenizedContextTerminalSetBuilder.calculateWinnerUnmatchTokens(trainingDataset, TOKEN_UNMATCH_THREASHOLD, DISCARD_W_TOKENS);
        winnerTokens.putAll(winnerUnMatchTokens);
        for (Map.Entry entry : winnerTokens.entrySet()) {
            String token = (String)entry.getKey();
            double v = (Double)entry.getValue();
            Constant leaf = new Constant(Utils.escape(token));
            terminalSet.add(leaf);
        }
        terminalSet.addAll(Utils.generateRegexRanges(charset));
        terminalSet.add(new Constant("\\d"));
        terminalSet.add(new Constant("\\w"));
        nodeFactory.getTerminalSet().clear();
        nodeFactory.getTerminalSet().addAll(terminalSet);
    }

    public static Map<String, Double> calculateWinnerMatchTokens(DataSet dataSet, double threashold, boolean discardWtokens) {
        HashMap<String, Double> tokensCounter = new HashMap<String, Double>();
        HashMap<String, Double> winnerMatchTokensLocal = new HashMap<String, Double>();
        for (DataSet.Example example : dataSet.getExamples()) {
            for (String match : example.getMatchedStrings()) {
                List<String> tokens = tokenizer.tokenize(match);
                HashSet<String> tokensSet = new HashSet<String>(tokens);
                for (String token : tokensSet) {
                    if (TokenizedContextTerminalSetBuilder.matchW(token) && discardWtokens) continue;
                    if (tokensCounter.containsKey(token)) {
                        Double value;
                        Double d = value = (Double)tokensCounter.get(token);
                        Double d2 = value = Double.valueOf(value + 1.0);
                        tokensCounter.put(token, value);
                        continue;
                    }
                    tokensCounter.put(token, 1.0);
                }
            }
        }
        int numberOfMatches = dataSet.getNumberMatches();
        for (Map.Entry entry : tokensCounter.entrySet()) {
            String key = (String)entry.getKey();
            Double double1 = (Double)entry.getValue();
            Double doublePercentange = double1 * 100.0 / (double)numberOfMatches;
            entry.setValue(doublePercentange);
            if (!(doublePercentange >= threashold)) continue;
            winnerMatchTokensLocal.put(key, doublePercentange);
        }
        return winnerMatchTokensLocal;
    }

    public static Map<String, Double> calculateWinnerUnmatchTokens(DataSet dataSet, double threashold, boolean discardWtokens) {
        HashMap<String, Double> tokensCounter = new HashMap<String, Double>();
        HashMap<String, Double> winnerUnmatchTokensLocal = new HashMap<String, Double>();
        int numberOfPositiveExamples = 0;
        for (DataSet.Example example : dataSet.getExamples()) {
            if (example.getMatch().isEmpty()) continue;
            ++numberOfPositiveExamples;
            HashSet<String> exampleTokenSet = new HashSet<String>();
            for (String unmatch : example.getUnmatchedStrings()) {
                List<String> tokens = tokenizer.tokenize(unmatch);
                exampleTokenSet.addAll(tokens);
            }
            for (String token : exampleTokenSet) {
                if (TokenizedContextTerminalSetBuilder.matchW(token) && discardWtokens) continue;
                if (tokensCounter.containsKey(token)) {
                    Double value;
                    Double d = value = (Double)tokensCounter.get(token);
                    Double d2 = value = Double.valueOf(value + 1.0);
                    tokensCounter.put(token, value);
                    continue;
                }
                tokensCounter.put(token, 1.0);
            }
        }
        for (Map.Entry entry : tokensCounter.entrySet()) {
            String key = (String)entry.getKey();
            Double double1 = (Double)entry.getValue();
            Double doublePercentange = double1 * 100.0 / (double)numberOfPositiveExamples;
            entry.setValue(doublePercentange);
            if (!(doublePercentange >= threashold)) continue;
            winnerUnmatchTokensLocal.put(key, doublePercentange);
        }
        return winnerUnmatchTokensLocal;
    }

    @Override
    public void setup(Context context) {
        context.getConfiguration().initNodeFactory();
        this.setup(context.getConfiguration(), context.getCurrentDataSet());
    }
}

