/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package com.seclust;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.regex.Pattern;

/**
 *
 * @author baga
 */
public class ReadContent {

    private File pathName;
    private StringBuffer strBuf;
    private String sCari = new String("<!--m-->");
    private DBOperator dbOpt;

    public ReadContent(String pathName) {
        this.pathName = new File(pathName);

        dbOpt = new DBOperator();
    }

    public ReadContent() {
        this("/export/home/baga/Documents/Book/TA/data gugel");
    }

    private int[] getPoint(File filePath) {
        int[] tmpInt = new int[100];
        int cntInt = 0;
        try {
            int n;
            strBuf = new StringBuffer();
            FileInputStream fi = new FileInputStream(filePath);
            try {
                while ((n = fi.available()) > 0) {
                    byte[] bt = new byte[n];
                    int result = fi.read(bt);

                    if (result == -1) {
                        break;
                    }
                    strBuf.append(new String(bt));
                }
                fi.close();

                int accessI = 0;
                int nextI = 0;
                for (int i = 0; i < strBuf.length(); i++) {
                    int j = 0;
                    accessI = i;
                    nextI = i;

                    while ((j < sCari.length()) && (strBuf.charAt(i) == sCari.charAt(j))) {
                        if ((accessI != i) && (strBuf.charAt(i) == sCari.charAt(0))) {
                            nextI = i;
                        }

                        i++;
                        j++;
                    }
                    if (accessI != nextI) {
                        i = nextI;
                    }
                    if (j == sCari.length()) {
                        tmpInt[cntInt] = i;
                        cntInt++;
                    }
                }
            } catch (IOException ie) {
                ie.printStackTrace();
            }
        } catch (FileNotFoundException fe) {
            fe.printStackTrace();
        }
        return tmpInt;
    }

    private void getDirList(File dirPath) {
        File[] dir = dirPath.listFiles();

        for (int i = 0; i < dir.length; i++) {
            if (dir[i].isDirectory()) {
                System.out.println(dir[i].getAbsolutePath());
                getDirList(dir[i]);
            } else {
                this.extractFile(dir[i]);
            }
        }
    }

    private String getFile(File path) {
        return path.getAbsolutePath();
    }

    private String stripAllTag(String theString) {
        StringBuffer temp = new StringBuffer();

        for (int i = 0; i < theString.length(); i++) {
            if (theString.charAt(i) == '<') {
                while (theString.charAt(i) != '>') {
                    i++;
                }
            } else {
                temp.append(theString.charAt(i));
            }
        }

        return new String(temp);
    }

    private String getHeader(String header) {
        String[] arrString = header.split("/");

        return arrString[arrString.length - 1];
    }

    private String[] getUrlDest(String sbBuf) {
        StringBuffer sbLink = new StringBuffer();
        int index = 0;
        String[] url = new String[3];

        if ((index = sbBuf.toLowerCase().indexOf("href")) == -1) {
            return null;
        }

        index++;
        String rem = sbBuf.substring(index);

        index = rem.toLowerCase().indexOf("\"");
        int endIndex = rem.toLowerCase().indexOf("\"", index + 1);

        // mengambil nilai url nya

        url[1] = rem.toLowerCase().substring(index + 1, endIndex);

        // cari tag endnya
        index = rem.toLowerCase().indexOf('>');
        endIndex = rem.toLowerCase().indexOf("</a>");

        // get the <div class="s">
        url[0] = rem.substring(index + 1, endIndex);

        url[2] = this.stripAllTag(rem.substring(endIndex));
        return url;
    }

    public void extractInf() {
        this.getDirList(this.pathName);
    }

    private void extractFile(File path) {
        System.out.println("processing file :" + path.getAbsolutePath());

        int[] pntNya = this.getPoint(path);
            String title = this.getHeader(path.getParent());
            System.out.println("Header : " + title);

            for (int j = 0; j < pntNya.length; j++) {

                if (((j + 1) < pntNya.length) && (pntNya[j + 1] != 0)) {
                    String nilaiSebenarnya = this.strBuf.substring(pntNya[j], pntNya[j + 1]) + "\n";

                    // inserting to database
                    String[] restUrl = this.getUrlDest(nilaiSebenarnya);
                    System.out.println("inserting to database");

                    dbOpt.insTable(restUrl[0], restUrl[1], restUrl[2], title);

                    
                }
            }
    }

    public static void main(String args[]) {
        ReadContent rc = new ReadContent();
        rc.extractInf();
    }
}

class DirFilter implements FilenameFilter {

    private Pattern pattern;

    public DirFilter(String regex) {
        pattern = Pattern.compile(regex);
    }

    public boolean accept(File dir, String name) {
        // Strip path information, search for regex:
        return pattern.matcher(
                new File(name).getName()).matches();
    }
}

