/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package com.seclust;

//import com.baga.util.SplitNormalizeToken;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;

/**
 *
 * @author baga
 */
public class WeightTF {

    private String useQuery;
    private DBOperator dbOpr;
    private Stemmer stem;
    private String contents[];
    private String urls[];
    private String allTerm[];
    private ArrayList stopWordTerm;
    private ArrayList stopWordCount;
    private boolean isPrep = true;
    private ArrayList stopWords;
    private double[][] matrix;
    private String tableUse;

    public WeightTF(String query, String table, boolean isPrep) {
        this.isPrep = isPrep;
        tableUse = table;
        if(isPrep) {
            tableUse += "_prep";
        }
        String dbName = null;
        if(table.equals("tbta_pa")) {
            dbName = "lingo";
        } else {
            dbName = "db_secluts";
        }
        dbOpr = new DBOperator("root", "root", dbName);
        useQuery = query;
        stem = new Stemmer();
        stem.add(useQuery.toCharArray(), useQuery.length());
        stem.stem();
        useQuery = stem.toString();
        dbOpr.getDocBaseIndex(query.toLowerCase(), table);
        contents = dbOpr.getContents();
        System.out.println("banyak kolom " + contents.length);
        urls = dbOpr.getUrls();
        stopWordTerm = new ArrayList();
        stopWordCount = new ArrayList();
        stopWords = dbOpr.getStopWords();
//        matrix = matrixLingo(query);
        int size, urlSize, count;
        allTerm = getAllTerms();
        urlSize = urls.length;
        size = allTerm.length;
        matrix = new double[size][urlSize];
        //double[] normalValues = new double[urlSize];
        double[] normalValues = new double[size];
        for (int i = 0; i < size; i++) {
            for (int j = 0; j < urlSize; j++) {
                count = 0;
                String tempUrl[] = null;
                if (isPrep) {
                    tempUrl = SplitNormalizeToken.getNormalTokensStemmer(contents[j]);
                } else {
                    tempUrl = contents[j].split(" ");
                }

                for (String t : tempUrl) {
                    if (!t.trim().isEmpty()) {
                        if (isPrep) {
                            if (!stopWordTerm.contains(t.toLowerCase())) {
                                stopWordTerm.add(t);
                                stopWordCount.add(1);
                            } else {
                                int index = stopWordTerm.indexOf(t.toLowerCase());
                                Integer valTerm = (Integer) stopWordCount.get(index);
                                ++valTerm;
                                stopWordCount.set(index, valTerm);
                            }
//                            System.out.println(t);
                        }
                        if (allTerm[i].toLowerCase().equals(t.toLowerCase())) {
                            count++;
                        }
                    }
                }
                matrix[i][j] = count; // hanya menggunakan TF
                normalValues[i] += Math.pow(count, 2);
            }
        }
        // nilai dari normal values
        for(int i=0; i<matrix.length; i++) {
            for(int j=0; j<matrix[i].length; j++) {
                matrix[i][j] *= (1 / Math.sqrt(normalValues[i]));
            }
        }

        writeToFile(matrix);
    }

    public String getTokens() {
        return useQuery;
    }

    public double[][] getTFMatrix() {
        return matrix;
    }

    public void runMatlan(int k) {
        try {
            // TODO code application logic
            File aFile = new File("A_"+useQuery+"_"+tableUse+".txt");
            String fileName = "VDS_" + useQuery +  "_" + tableUse + "_" + k + ".txt";
            File fileVDS = new File(fileName);
//            System.out.println(fileName);
            if (aFile.exists() && !fileVDS.exists()) {
                System.out.println("harusnya masuk sini");
                String path = "D:\\nova";
                String[] cmd = {"matlab", "-sd", path, "-r", "cobaM2('"+aFile.getName()+"'," + k + ", '" + useQuery + "', '"+tableUse+"')"};
                Process proc = Runtime.getRuntime().exec(cmd);
                proc.waitFor();
            }
        } catch (InterruptedException ex) {
        } catch (IOException ex) {
        }
    }

    public String[] getUrls() {
        return urls;
    }

    public String[] getContents() {
        return contents;
    }

    /*
    private int[][] matrixLingo(String queryLingo) {
    int[][] matrixLingo = null;
    try {
    ArrayList dok = new ArrayList();
    Class.forName("com.mysql.jdbc.Driver");
    String connStr = "jdbc:mysql://localhost:3306/lingo"
    + "?user=root&password";
    Connection conn = DriverManager.getConnection(connStr);
    PreparedStatement pst = conn.prepareCall("select b.term_doc_id "
    + "from term a, term_doc b where a.term_id = b.term_id "
    + "and a.term_word = "
    + "?");
    pst.setString(1, queryLingo);
    ResultSet rs = pst.executeQuery();
    while (rs.next()) {
    dok.add(rs.getInt(1));
    }

    pst = conn.prepareCall("select count(*) from (select a.term_word "
    + "from term a, term_doc b where a.term_id = b.term_id and "
    + "b.term_doc_id in (select b.term_doc_id from term a, "
    + "term_doc b where a.term_id = b.term_id and "
    + "a.term_word =  ? ) group by a.term_word) a");
    pst.setString(1, queryLingo);
    rs = pst.executeQuery();
    int count = 0;
    while (rs.next()) {
    count = rs.getInt(1);
    }
    matrixLingo = new int[count][dok.size()];
    pst = conn.prepareCall("select a.term_word word, "
    + "b.term_doc_id doc_id, b.term_count count from term a, "
    + "term_doc b " + "where a.term_id = b.term_id and b.term_doc_id in "
    + "(select b.term_doc_id from term a, term_doc b where "
    + "a.term_id = b.term_id and a.term_word = ? ) ");
    pst.setString(1, queryLingo);
    rs = pst.executeQuery();
    int indexDoc = -1;
    ArrayList wordList = new ArrayList();
    int idx = 0;
    while (rs.next()) {
    String word = rs.getString("word");
    int docId = rs.getInt("doc_id");
    int wordCount = rs.getInt("count");
    if (wordList.isEmpty()) {
    wordList.add(word);
    ++indexDoc;
    }
    if (!wordList.contains(word)) {
    wordList.add(word);
    ++indexDoc;
    }
    idx = dok.indexOf(docId);
    matrixLingo[indexDoc][idx] = wordCount;
    }
    } catch (SQLException se) {
    se.printStackTrace();
    } catch (ClassNotFoundException ce) {
    ce.printStackTrace();
    }
    return matrixLingo;
    }
     */
    public ArrayList getStopWords() {
//        ArrayList alRes = new ArrayList();
//        if (isPrep) {
//            int size = stopWordCount.size();
////            System.out.println(size);
//
//            int xVal;
//            for (int i = 0; i < size; i++) {
//                Integer max = (Integer) stopWordCount.get(i);
//                xVal = i;
//                for (int j = i + 1; j < size; j++) {
//                    Integer jVal = (Integer) stopWordCount.get(j);
//                    if (jVal > max) {
//                        xVal = j;
//                    }
//                }
//                String tempString = (String) stopWordTerm.get(i);
//                Integer tempInt = (Integer) stopWordCount.get(i);
//                stopWordTerm.set(i, stopWordTerm.get(xVal));
//                stopWordCount.set(i, stopWordCount.get(xVal));
//                stopWordTerm.set(xVal, tempString);
//                stopWordCount.set(xVal, tempInt);
//            }
//
//            for (int i = 0; i < 20; i++) {
//                alRes.add(stopWordTerm.get(i));
//            }
//        }
////        System.out.println("stop word " + stopWordTerm.size() + " stop count " + stopWordCount.size());
        return stopWords;
    }

    private void writeToFile(double matrix[][]) {
        if (matrix != null) {
            StringBuffer strBuf = new StringBuffer();
            try {
                DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(
                        new FileOutputStream("A_"+useQuery+"_"+tableUse+".txt")));
                for (int i = 0; i < matrix.length; i++) {
                    for (int j = 0; j < matrix[i].length; j++) {
                        strBuf.append(matrix[i][j] + " ");
                    }
                    strBuf.append("\n");
                    String str = new String(strBuf);
                    dos.write(str.getBytes());
                    strBuf.delete(0, strBuf.length());
                }
                dos.close();
            } catch (IOException ie) {
                ie.printStackTrace();
            }
        }
    }

    public String[] getTerms() {
        return allTerm;
    }

    private String[] getAllTerms() {
        ArrayList tempVec = new ArrayList();
        for (String s : contents) {
            String temp[] = null;
            if(isPrep) {
             temp = SplitNormalizeToken.getNormalTokensStemmer(s);
            } else {
                temp = s.split(" ");
            }
            for (String x : temp) {
                if (!tempVec.contains(x) && !x.trim().isEmpty()) {
                    tempVec.add(x);
                }
            }
        }
        String tempStr[] = new String[tempVec.size()];
        return (String[]) tempVec.toArray(tempStr);
    }
    public static void main(String[] args) {
        WeightTF wf = new WeightTF("life", "t_clust_full", true);
        double[][] matrix = wf.getTFMatrix();
        String[] contents = wf.getContents();
        for(int i=0; i<contents.length; i++) {
            System.out.println(contents[i]);
        }
        String[] term = wf.getTerms();
        for (int i = 0; i < matrix.length; i++) {
            System.out.print(term[i] + " ");
            for (int j = 0; j < matrix[i].length; j++) {
                System.out.print(matrix[i][j] + " ");
            }
            System.out.println();
        }
    }
}
