{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "075f2166",
   "metadata": {},
   "outputs": [],
   "source": [
    "## troenpy source code is at the end of the book as it is very long. Load it first."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a1c073c3",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package stopwords to\n",
      "[nltk_data]     /Users/arthur/nltk_data...\n",
      "[nltk_data]   Package stopwords is already up-to-date!\n"
     ]
    }
   ],
   "source": [
    "# %load \"packages_headers.py\"\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.pipeline import make_pipeline\n",
    "\n",
    "from heapq import heapify, heappush, heappop\n",
    "\n",
    "import numpy as np\n",
    "from numpy import array\n",
    "import scipy.sparse as sp\n",
    "from sklearn.preprocessing import normalize\n",
    "from scipy.sparse import diags\n",
    "from scipy.sparse import csr_matrix\n",
    "#import scipy.sparse as sp\n",
    "import time\n",
    "t_s = time.time()\n",
    "#print(\"time passed:\"+ str(time.time()-t_s))\n",
    "import sklearn.metrics.pairwise as pdist\n",
    "from sklearn.metrics.pairwise import euclidean_distances\n",
    "import gc\n",
    "#del(variable_name)\n",
    "gc.collect()\n",
    "import random\n",
    "\n",
    "\n",
    "import requests\n",
    "requests.packages.urllib3.disable_warnings()\n",
    "import ssl\n",
    "\n",
    "try:\n",
    "    _create_unverified_https_context = ssl._create_unverified_context\n",
    "except AttributeError:\n",
    "    # Legacy Python that doesn't verify HTTPS certificates by default\n",
    "    pass\n",
    "else:\n",
    "    # Handle target environment that doesn't support HTTPS verification\n",
    "    ssl._create_default_https_context = _create_unverified_https_context\n",
    "\n",
    "\n",
    "#import gensim\n",
    "import logging\n",
    "logging.basicConfig(level=logging.INFO)\n",
    "#from gensim.models.fasttext import FastText\n",
    "#fastexmod = FastText.load_fasttext_format(fastdr)\n",
    "\n",
    "from numpy import array\n",
    "import numpy as np\n",
    "import scipy as sp\n",
    "import pandas as pd\n",
    "import csv\n",
    "import ast\n",
    "import json\n",
    "import scipy.spatial as spatial\n",
    "\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "from sklearn.metrics.pairwise import euclidean_distances\n",
    "import ot\n",
    "#from pyemd import emd\n",
    "from nltk.corpus import stopwords\n",
    "from collections import Counter\n",
    "import nltk\n",
    "\n",
    "import pandas as pd\n",
    "from numpy import linalg as LA\n",
    "import string\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.preprocessing import normalize\n",
    "from sklearn.metrics.pairwise import euclidean_distances\n",
    "pd.set_option('display.max_rows', 500)\n",
    "pd.set_option('display.max_columns', 500)\n",
    "pd.set_option('display.width', 1000)\n",
    "pd.set_option('display.max_colwidth', 1000)\n",
    "import csv\n",
    "import os\n",
    "nltk.download('stopwords')\n",
    "stop_words = set(stopwords.words('english'))\n",
    "len(stop_words)\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.datasets import load_digits\n",
    "#from scipy.spatial.distance import pdist\n",
    "from sklearn.manifold._t_sne import _joint_probabilities\n",
    "from scipy import linalg\n",
    "from sklearn.metrics import pairwise_distances\n",
    "from scipy.spatial.distance import squareform\n",
    "from sklearn.manifold import TSNE\n",
    "#from matplotlib import pyplot as plt\n",
    "import seaborn as sns\n",
    "#sns.set(rc={'figure.figsize':(11.7,8.27)})\n",
    "#palette = sns.color_palette(\"bright\", 10)\n",
    "import random\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from scipy.sparse import csr_matrix\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\n",
    "from sklearn.decomposition import NMF, LatentDirichletAllocation\n",
    "from sklearn.feature_extraction.text import TfidfTransformer\n",
    "from sklearn.pipeline import Pipeline\n",
    "import time\n",
    "from numpy import linalg as LA\n",
    "from scipy import stats\n",
    "from collections import Counter\n",
    "from sklearn.linear_model import LogisticRegression as LR\n",
    "from sklearn.metrics import classification_report, confusion_matrix\n",
    "from sklearn import model_selection\n",
    "from sklearn import linear_model\n",
    "from sklearn import metrics\n",
    "import scipy.sparse as sp\n",
    "from sklearn import svm\n",
    "import re\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "0360e248",
   "metadata": {},
   "outputs": [],
   "source": [
    "# %load \"utility_fcns.py\"\n",
    "def getdatafromYelpFile(filenm):\n",
    "    \"\"\" this function uses the input filename\n",
    "      assume each line: label +\\t+ line\n",
    "      return list of labels, list of lines\n",
    "    \"\"\"\n",
    "    dat=[]\n",
    "    Y=[]\n",
    "    df=pd.read_csv(filenm, delimiter=',', header=None, names=['label', 'desp'])\n",
    "    for i in range(len(df)):\n",
    "        dat.append(df.desp[i])\n",
    "        Y.append(df.label[i])\n",
    "    return (dat,Y)\n",
    "\n",
    "\n",
    "\n",
    "def getdatafromAGFile(filenm):\n",
    "    \"\"\" this function uses the input filename\n",
    "      assume each line: label +\\t+ line\n",
    "      return list of labels, list of lines\n",
    "    \"\"\"\n",
    "    dat=[]\n",
    "    Y=[]\n",
    "    df=pd.read_csv(filenm, delimiter=',', header=None, names=['label','title','desp'])\n",
    "    for i in range(len(df)):\n",
    "        dat.append(df.title[i]+\" \"+df.desp[i])\n",
    "        Y.append(df.label[i])\n",
    "    return (dat,Y)\n",
    "\n",
    "\n",
    "\n",
    "## utility function for the 4 sampling datasets\n",
    "# datanmlist=[bbcfn,twfn,amazonfn,classfn]\n",
    "# def getsampledata(datanm=datanmlist[0]):\n",
    "#     print(datanm)\n",
    "#     dat,Y=getdatafromFile(datanm)\n",
    "#     #(dat,Y)=getdatafromFile(classfn)\n",
    "#     Labdic=getLabDic(Y)\n",
    "#     Y2=[Labdic[e] for e in Y]\n",
    "#     #(traindat, train_lab, testdat, test_lab, labdic)=datasplit(dat,Y2)\n",
    "#     return datasplit(dat,Y2)\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "## KNN classifier\n",
    "\n",
    "\n",
    "## from HOTT by Solomon NIPS2019   https://github.com/IBM/HOTT/blob/master/knn_classifier.py\n",
    "\n",
    "def predict(neighbor_classes, C, cross_val=False):\n",
    "    # Make sure all classes are considered\n",
    "    labels = np.concatenate((neighbor_classes, list(range(C))))\n",
    "    # Find class frequency among neighbors\n",
    "    weights = np.unique(labels, return_counts=True)[1]\n",
    "    # Find most popular class\n",
    "    prediction = np.argmax(weights)\n",
    "    nn=len(neighbor_classes)\n",
    "    # If most popular class is ambiguous try with fewer neighbors; else return\n",
    "    if sum(weights[prediction] == weights) > 1 and nn>2:\n",
    "    #if sum(weights[prediction] == weights) > 1:\n",
    "        return predict(neighbor_classes[:-2], C)\n",
    "    else:\n",
    "        return prediction\n",
    "    #return prediction\n",
    "\n",
    "\n",
    "    ### method is the actual approach(method) one specify when use the knn.\n",
    "def knn(X_train, X_test, y_train, y_test, method,sents2vec, usePos=True, n_neighbors=7):\n",
    "    # Number of classes\n",
    "    n_classes = len(np.unique(y_train))\n",
    "\n",
    "    prediction = []\n",
    "    for doc in X_test:\n",
    "        doc_to_train = [method(doc, x, sents2vec, usePos) for x in X_train]\n",
    "        # Find indices of n_neighbors closest documents\n",
    "        rank = np.argsort(doc_to_train)[:n_neighbors]\n",
    "\n",
    "        # Make prediction based on most popular class among neighbors\n",
    "        prediction.append(predict(y_train[rank], n_classes))\n",
    "\n",
    "    # Print and return test error\n",
    "    #prediction=np.asarray(prediction)\n",
    "    count=0; ntest=len(prediction)\n",
    "    for i in range(ntest):\n",
    "        if prediction[i]==y_test[i]:\n",
    "            count=count+1\n",
    "    test_error = 1 - count/ntest\n",
    "    return test_error\n",
    "\n",
    "\n",
    "    ### \n",
    "def knn2(X_train, X_test, y_train, y_test, method, dist, usePos=False, n_neighbors=7):\n",
    "    # Number of classes\n",
    "    n_classes = len(np.unique(y_train))\n",
    "\n",
    "    prediction = []\n",
    "    for i in range(len(y_test)):\n",
    "        #doc_to_train = [method(doc, x, sents2vec, usePos) for x in X_train]\n",
    "        doc_to_train=dist[i,:]\n",
    "        # Find indices of n_neighbors closest documents\n",
    "        rank = np.argsort(doc_to_train)[:n_neighbors]\n",
    "\n",
    "        # Make prediction based on most popular class among neighbors\n",
    "        prediction.append(predict(y_train[rank], n_classes))\n",
    "\n",
    "    # Print and return test error\n",
    "    #prediction=np.asarray(prediction)\n",
    "    count=0; ntest=len(prediction)\n",
    "    for i in range(ntest):\n",
    "        if prediction[i]==y_test[i]:\n",
    "            count=count+1\n",
    "    test_error = 1 - count/ntest\n",
    "    return test_error\n",
    "\n",
    "\n",
    "    ### y: labels\n",
    "def knnClassify(y_train, y_test, dist, n_neighbors=7):\n",
    "    # Number of classes\n",
    "    n_classes = len(np.unique(y_train))\n",
    "\n",
    "    prediction = []\n",
    "    for i in range(len(y_test)):\n",
    "        doc_to_train=dist[i,:]\n",
    "        # Find indices of n_neighbors closest documents\n",
    "        rank = np.argsort(doc_to_train)[:n_neighbors]\n",
    "\n",
    "        # Make prediction based on most popular class among neighbors\n",
    "        prediction.append(predict(y_train[rank], n_classes))\n",
    "\n",
    "    # Print and return test error\n",
    "    #prediction=np.asarray(prediction)\n",
    "    count=0; ntest=len(prediction)\n",
    "    for i in range(ntest):\n",
    "        if prediction[i]==y_test[i]:\n",
    "            count=count+1\n",
    "    test_error = 1 - count/ntest\n",
    "    return test_error\n",
    "\n",
    "def getOptimalK(labs, dist, cv=5):\n",
    "    \"\"\"input: lab of samples and dist matrix of pairwise dist, crossvalidation =5 by default\n",
    "       output: return the optimal k of knn from {1,...,19}, also print the optimal error rates\n",
    "    \"\"\"\n",
    "    ## sample size\n",
    "    n=dist.shape[0]\n",
    "    foldsz=n//cv\n",
    "    \n",
    "    opt_k=1\n",
    "    min_err_rate = 1\n",
    "    for k in range(1, 20):\n",
    "        k_err_rate = getErrorShuffle(labs, dist, k, foldsz, cv)\n",
    "        if k_err_rate < min_err_rate:\n",
    "            min_err_rate = k_err_rate\n",
    "            opt_k = k\n",
    "    print(\"k {:2d} | err:{:3f}\".format(opt_k, min_err_rate))\n",
    "    return opt_k\n",
    "\n",
    "def getError(labs, dist, nbk=7, Foldsz=50, CV=5):\n",
    "    n=dist.shape[0]\n",
    "    # Number of classes\n",
    "    n_classes = len(np.unique(labs))\n",
    "    errs=[]\n",
    "    for cv in range(0,5):\n",
    "        testidxst=cv*Foldsz\n",
    "        testidxend=min(n,testidxst+Foldsz)\n",
    "        testidx=list(range(testidxst,testidxend))\n",
    "        trainidx=list(range(0,n))\n",
    "        for idx in testidx:\n",
    "            trainidx.remove(idx)\n",
    "        trainidx=np.asarray(trainidx)\n",
    "        testidx=np.asarray(testidx)\n",
    "        train_labs=labs[trainidx]\n",
    "        test_labs=labs[testidx]\n",
    "        ###\n",
    "        prediction = []\n",
    "        for i in testidx:\n",
    "            doc_to_train=dist[i,trainidx]\n",
    "            # Find indices of n_neighbors closest documents\n",
    "            rank = np.argsort(doc_to_train)[:nbk]\n",
    "            # Make prediction based on most popular class among neighbors\n",
    "            prediction.append(predict(train_labs[rank], n_classes))\n",
    "\n",
    "        prediction=np.asarray(prediction)\n",
    "        count=0; ntest=len(prediction)\n",
    "        for i in range(ntest):\n",
    "            if prediction[i]==test_labs[i]:\n",
    "                count=count+1\n",
    "        test_error = 1 - count/ntest\n",
    "        errs.append(test_error)\n",
    "    print(errs)\n",
    "    return np.mean(errs)\n",
    "\n",
    "#import random\n",
    "def getErrorShuffle(labs, dist, nbk=7, Foldsz=50, CV=5):\n",
    "    n=dist.shape[0]\n",
    "    # Number of classes\n",
    "    n_classes = len(np.unique(labs))\n",
    "    errs=[]\n",
    "    n=dist.shape[0]\n",
    "    transfidx=list(range(0,n))\n",
    "    random.shuffle(transfidx)\n",
    "    transfidx=np.asarray(transfidx)\n",
    "    \n",
    "    for cv in range(0,5):\n",
    "        testidxst=cv*Foldsz\n",
    "        testidxend=min(n,testidxst+Foldsz)\n",
    "        faketestidx=list(range(testidxst,testidxend))\n",
    "        faketestidx=np.asarray(faketestidx)\n",
    "        testidx=transfidx[faketestidx]\n",
    "        trainidx=list(range(0,n))\n",
    "        for idx in testidx:\n",
    "            trainidx.remove(idx)\n",
    "        trainidx=np.asarray(trainidx)\n",
    "        testidx=np.asarray(testidx)\n",
    "        train_labs=labs[trainidx]\n",
    "        test_labs=labs[testidx]\n",
    "        ###\n",
    "        prediction = []\n",
    "        for i in testidx:\n",
    "            doc_to_train=dist[i,trainidx]\n",
    "            # Find indices of n_neighbors closest documents\n",
    "            rank = np.argsort(doc_to_train)[:nbk]\n",
    "            # Make prediction based on most popular class among neighbors\n",
    "            prediction.append(predict(train_labs[rank], n_classes, cross_val=True))\n",
    "\n",
    "        prediction=np.asarray(prediction)\n",
    "        count=0; ntest=len(prediction)\n",
    "        for i in range(ntest):\n",
    "            if prediction[i]==test_labs[i]:\n",
    "                count=count+1\n",
    "        test_error = 1 - count/ntest\n",
    "        errs.append(test_error)\n",
    "    #print(errs)\n",
    "    return np.mean(errs)\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "def getLabDic(lables):\n",
    "    \"\"\"input a list of lables\n",
    "    return a dictionary mapping labels to 0,1,2,3...n-1\n",
    "    \"\"\"\n",
    "    Labs=list(set(lables))\n",
    "    n=len(Labs)\n",
    "    labdic={}\n",
    "    for i in range(n):\n",
    "        labdic[Labs[i]]=i\n",
    "    for k,v in labdic.items():\n",
    "        print(str(k)+\"--->\"+str(v))\n",
    "    return labdic\n",
    "\n",
    "def getdatafromFile(filenm):\n",
    "    \"\"\" this function uses the input filename\n",
    "      assume each line: label +\\t+ line\n",
    "      return list of labels, list of lines\n",
    "    \"\"\"\n",
    "    dat=[]\n",
    "    Y=[]\n",
    "    openf=open(filenm, 'r', encoding=\"latin1\")\n",
    "    for i, ln in enumerate(openf):\n",
    "        ln2=ln.split('\\t')\n",
    "        Y.append(ln2[0])\n",
    "        dat.append(ln2[1])\n",
    "    openf.close()\n",
    "    return (dat,Y)\n",
    "\n",
    "## split tran/test for bbc\n",
    "from random import sample\n",
    "\n",
    "def datasplit(data,labels):\n",
    "    \"\"\"80/20 split for train/test\"\"\"\n",
    "    nsample=len(data)\n",
    "    print(nsample)\n",
    "    allidx=list(range(0,nsample,1))\n",
    "    random.shuffle(allidx)\n",
    "    testidx=sample(allidx,int(nsample*0.2))\n",
    "    print(len(testidx))\n",
    "    trainidx=[i for i in allidx if i not in testidx]\n",
    "    print(len(trainidx))\n",
    "\n",
    "    labs=list(set(labels))\n",
    "    labdic={}\n",
    "    for i,l in enumerate(labs):\n",
    "        labdic[l]=i\n",
    "    for k,v in labdic.items():\n",
    "        print(str(k)+\"--->\"+str(v))\n",
    "       \n",
    "    train=[data[i] for i in trainidx]\n",
    "    train_lab=[labdic[labels[i]] for i in trainidx]\n",
    "    test=[data[i] for i in testidx]\n",
    "    test_lab=[labdic[labels[i]] for i in testidx]\n",
    "\n",
    "    test_lab=np.asarray(test_lab)\n",
    "    train_lab=np.asarray(train_lab)\n",
    "    return (train, train_lab, test, test_lab, labdic)\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "59a3615b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "399af693",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "2242e3d9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "cwd = os.getcwd()\n",
    "datadr=cwd\n",
    "SW = set()\n",
    "for line in open(datadr+'/stop_words.txt'):\n",
    "    line = line.strip()\n",
    "    if line != '':\n",
    "        SW.add(line)\n",
    "stop = list(SW)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8a040f3",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "9eb0b229",
   "metadata": {},
   "outputs": [],
   "source": [
    "### all amazon lines\n",
    "\n",
    "amazonfn=datadr+'/all_amazon_by_line.txt'\n",
    "classfn=datadr+'/all_classic.txt'\n",
    "twfn=datadr+'/all_twitter_by_line.txt'\n",
    "bbcfn=datadr+'/all_bbcsport_by_line.txt'\n",
    "\n",
    "test_r8fn=datadr+'/r8-test-all-terms.txt'\n",
    "train_r8fn=datadr+'/r8-train-all-terms.txt'\n",
    "test_20ngfn=datadr+'/20ng-test-all-terms.txt'\n",
    "train_20ngfn=datadr+'/20ng-train-all-terms.txt'\n",
    "### all classic lines\n",
    "\n",
    "train_ohfn=datadr+'/train_ohsumed_by_line.txt'\n",
    "test_ohfn=datadr+'/test_ohsumed_by_line.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "af871b77",
   "metadata": {},
   "outputs": [],
   "source": [
    "## utility function for the 4 sampling datasets\n",
    "datanmlist=[bbcfn,twfn,amazonfn,classfn]\n",
    "def getsampledata(datanm=datanmlist[0]):\n",
    "    print(datanm)\n",
    "    dat,Y=getdatafromFile(datanm)\n",
    "    #(dat,Y)=getdatafromFile(classfn)\n",
    "    Labdic=getLabDic(Y)\n",
    "    Y2=[Labdic[e] for e in Y]\n",
    "    #(traindat, train_lab, testdat, test_lab, labdic)=datasplit(dat,Y2)\n",
    "    return datasplit(dat,Y2)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6c85114f",
   "metadata": {},
   "outputs": [],
   "source": [
    "#09-30-2024 \n",
    "def repeatSampling_knn(filenm, Nsample=50):\n",
    "    idf=[]\n",
    "    it=[]## t: troenpy, pcf , pi\n",
    "    ncf10=[] ## idf*entropy, e1-basee\n",
    "    rncf10=[] ## reciprocal of entropy\n",
    "    nege=[]\n",
    "    for i in range(Nsample):\n",
    "        ## get method idferr\n",
    "        print(f\" ... {i} sampling ....\")\n",
    "        traindat, train_lab, testdat, test_lab, labdic=getsampledata(filenm)\n",
    "        ##zcf z=t-e, 0.3574\n",
    "        e=troenpy();\n",
    "        e.train(traindat, train_lab, \"KNN\", tunec=100, usencf=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        error=e.evaluate(testdat, test_lab, predModel=\"KNN\")\n",
    "        ncf10.append(error)\n",
    "        \n",
    "        e=troenpy();\n",
    "        e.train(traindat, train_lab, \"KNN\", tunec=100, userncf=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        error=e.evaluate(testdat, test_lab, predModel=\"KNN\")\n",
    "        rncf10.append(error)\n",
    "        \n",
    "        \n",
    "        e=troenpy();\n",
    "        e.train(traindat, train_lab, \"KNN\", tunec=100, usenege=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        error=e.evaluate(testdat, test_lab, predModel=\"KNN\")\n",
    "        nege.append(error)\n",
    "        \n",
    "\n",
    "        ## pcf 11: ng \n",
    "        e=troenpy();\n",
    "        e.train(traindat, train_lab, \"KNN\", tunec=100, usepcf=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        error=e.evaluate(testdat, test_lab, predModel=\"KNN\")\n",
    "        it.append(error)\n",
    "\n",
    "        #idf\n",
    "        e=troenpy();\n",
    "        e.train(traindat, train_lab, \"KNN\", tunec=100, useidf=True, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        error=e.evaluate(testdat, test_lab, predModel=\"KNN\")\n",
    "        idf.append(error)\n",
    "            \n",
    "        \n",
    "    #res=[]\n",
    "    #res.append((np.mean(idferr), np.std(idferr)))\n",
    "    #result={'nege':nege}\n",
    "    #result={'rncf':rncf10}\n",
    "    result={\"idf\":idf, 'it':it,  'ncf':ncf10, 'nege':nege,  'rncf':rncf10}\n",
    "    df=pd.DataFrame(result)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "308af369",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b311b9b6",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "8a1504db",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "earn--->0\n",
      "grain--->1\n",
      "ship--->2\n",
      "acq--->3\n",
      "trade--->4\n",
      "crude--->5\n",
      "money-fx--->6\n",
      "interest--->7\n"
     ]
    }
   ],
   "source": [
    "(traindat,trainY)=getdatafromFile(train_r8fn)\n",
    "(testdat, testY)=getdatafromFile(test_r8fn)\n",
    "r8Labdic=getLabDic(trainY)\n",
    "train_lab=[r8Labdic[e] for e in trainY]\n",
    "test_lab=[r8Labdic[e] for e in testY]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "5545ca4c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5485\n",
      "2189\n"
     ]
    }
   ],
   "source": [
    "print(len(train_lab))\n",
    "print(len(test_lab))\n",
    "train_lab=np.asarray(train_lab)\n",
    "test_lab=np.asarray(test_lab)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "91b7020f",
   "metadata": {},
   "outputs": [],
   "source": [
    "r8LabdicRev={}\n",
    "for k,v in r8Labdic.items():\n",
    "    r8LabdicRev[v]=k"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "3047f09a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(['trade', 'grain', 'ship', 'acq', 'earn'], array([0, 3, 0, 0, 0]))"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "testY[0:5], train_lab[0:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "81676df9",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7cb99ca7",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "bfa6e925",
   "metadata": {},
   "outputs": [],
   "source": [
    "#print(train_20ngfn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "516a99ec",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "comp.windows.x--->0\n",
      "sci.electronics--->1\n",
      "sci.crypt--->2\n",
      "comp.sys.ibm.pc.hardware--->3\n",
      "comp.sys.mac.hardware--->4\n",
      "rec.sport.baseball--->5\n",
      "misc.forsale--->6\n",
      "sci.med--->7\n",
      "rec.motorcycles--->8\n",
      "rec.sport.hockey--->9\n",
      "talk.politics.guns--->10\n",
      "talk.religion.misc--->11\n",
      "alt.atheism--->12\n",
      "comp.graphics--->13\n",
      "sci.space--->14\n",
      "soc.religion.christian--->15\n",
      "rec.autos--->16\n",
      "talk.politics.misc--->17\n",
      "comp.os.ms-windows.misc--->18\n",
      "talk.politics.mideast--->19\n",
      "11293\n",
      "7528\n"
     ]
    }
   ],
   "source": [
    "(traindat,trainY)=getdatafromFile(train_20ngfn)\n",
    "(testdat, testY)=getdatafromFile(test_20ngfn)\n",
    "t20ngLabdic = getLabDic(trainY)\n",
    "train_lab=[t20ngLabdic[e] for e in trainY]\n",
    "test_lab =[t20ngLabdic[e] for e in testY]\n",
    "train_lab=np.asarray(train_lab)\n",
    "test_lab=np.asarray(test_lab)\n",
    "print(len(train_lab))\n",
    "print(len(test_lab))\n",
    "t20ngLabdicRev={}\n",
    "for k,v in t20ngLabdic.items():\n",
    "    t20ngLabdicRev[v]=k"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "3ed5cc46",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "class10--->0\n",
      "class7--->1\n",
      "class4--->2\n",
      "class5--->3\n",
      "class2--->4\n",
      "class6--->5\n",
      "class9--->6\n",
      "class1--->7\n",
      "class3--->8\n",
      "class8--->9\n",
      "3999\n",
      "5153\n"
     ]
    }
   ],
   "source": [
    "#train_ohfn='train_ohsumed_by_line.txt'\n",
    "#test_ohfn='test_ohsumed_by_line.txt'\n",
    "\n",
    "(traindat,trainY)=getdatafromFile(train_ohfn)\n",
    "(testdat, testY)=getdatafromFile(test_ohfn)\n",
    "ohLabdic = getLabDic(trainY)\n",
    "train_lab=[ohLabdic[e] for e in trainY]\n",
    "test_lab =[ohLabdic[e] for e in testY]\n",
    "train_lab=np.asarray(train_lab)\n",
    "test_lab=np.asarray(test_lab)\n",
    "print(len(train_lab))\n",
    "print(len(test_lab))\n",
    "ohLabdicRev={}\n",
    "for k,v in ohLabdic.items():\n",
    "    ohLabdicRev[v]=k\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "e5641e7f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['class10', 'class7', 'class4', 'class5', 'class2', 'class6', 'class9', 'class1', 'class3', 'class8'])"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ohLabdic.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "accab218",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "b2825c85",
   "metadata": {},
   "outputs": [],
   "source": [
    "bbcfn=datadr+'/all_bbcsport_by_line.txt'\n",
    "#print(bbcfn)\n",
    "#print(twfn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "d1095a48",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "length of train: 5678\n",
      "length of test: 1419\n"
     ]
    }
   ],
   "source": [
    "(dat,Y)=getdatafromFile(bbcfn)\n",
    "#(dat,Y)=getdatafromFile(twfn)\n",
    "#(dat,Y)=getdatafromFile(amazonfn)\n",
    "(dat,Y)=getdatafromFile(classfn)\n",
    "Labdic=getLabDic(Y)\n",
    "Y2=[Labdic[e] for e in Y]\n",
    "(traindat, train_lab, testdat, test_lab, labdic)=datasplit(dat,Y2)\n",
    "#fastwv=sents2fastwvpc(train+test, remPC=False, mpc=5, d=300)\n",
    "#fastwvpc=sents2fastwvpc(train+test, remPC=True, mpc=5, d=300)\n",
    "#idfdic=getidfdic(test,train)\n",
    "print(\"length of train: \"+str(len(train_lab)))\n",
    "print(\"length of test: \"+str(len(test_lab)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "744215b3",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "97183cbe",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "b6cfe58e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5485 docs.\n",
      "shape of dfs:(19822,)\n",
      "len of dfs: 19822\n",
      "the vocabulary size =:19822\n",
      "time passed:2236.8952629566193\n",
      "DC0:base entropy=-1.335287313108601, DC0:basete troenpy=-0.4848154539769855\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =19822\n",
      "fle max=-1.584871106918059\n",
      "normalizing bias features ...\n",
      "number of ebias features =19822\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "0.14253083599817273\n"
     ]
    }
   ],
   "source": [
    "## 09-30-2024 negentropy\n",
    "e2=troenpy();\n",
    "e2.train(traindat, train_lab, \"KNN\", tunec=100, usenege=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "err=e2.evaluate(testdat, test_lab, predModel=\"KNN\")\n",
    "print(err)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5b766fa4",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c82a215d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "89d18ce5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 3999 docs.\n",
      "shape of dfs:(22052,)\n",
      "len of dfs: 22052\n",
      "the vocabulary size =:22052\n",
      "time passed:2336.1879601478577\n",
      "DC0:base entropy=-2.000497425390672, DC0:basete troenpy=-0.18462356525225312\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =22052\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "0.3854065592858529\n"
     ]
    }
   ],
   "source": [
    "#idf\n",
    "e=troenpy();\n",
    "e.train(traindat, train_lab, \"KNN\", tunec=100, useidf=True, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "error=e.evaluate(testdat, test_lab, predModel=\"KNN\")\n",
    "#idf.append(error)\n",
    "print(error)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "50f720d6",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "2b913da5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 3999 docs.\n",
      "shape of dfs:(22052,)\n",
      "len of dfs: 22052\n",
      "the vocabulary size =:22052\n",
      "time passed:2363.649204015732\n",
      "DC0:base entropy=-2.000497425390672, DC0:basete troenpy=-0.18462356525225312\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22052\n",
      "fle max=4.064309147672834\n",
      "normalizing bias features ...\n",
      "number of ebias features =22052\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "0.43101106151756263\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 3999 docs.\n",
      "shape of dfs:(22052,)\n",
      "len of dfs: 22052\n",
      "the vocabulary size =:22052\n",
      "time passed:2390.562346935272\n",
      "DC0:base entropy=-2.000497425390672, DC0:basete troenpy=-0.18462356525225312\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22052\n",
      "fle max=3.5858209469144087\n",
      "normalizing bias features ...\n",
      "number of ebias features =22052\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "0.3399961187657675\n"
     ]
    }
   ],
   "source": [
    "## 1/entropy, \n",
    "e2=troenpy();\n",
    "e2.train(traindat, train_lab, \"KNN\", tunec=100, usencf=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "err=e2.evaluate(testdat, test_lab, predModel=\"KNN\")\n",
    "#incf10.append(error)\n",
    "print(err)\n",
    "\n",
    "## pcf 11: ng \n",
    "e=troenpy();\n",
    "e.train(traindat, train_lab, \"KNN\", tunec=100, usepcf=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "error=e.evaluate(testdat, test_lab, predModel=\"KNN\")\n",
    "print(error)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4fc6068",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bb30d3b1",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6637b908",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "64495819",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " ... 0 sampling ....\n",
      "/Users/arthur/work/PN_manus2023/PN/manus/iclr2024/iclr2025/data/all_bbcsport_by_line.txt\n",
      "class_rugby--->0\n",
      "class_football--->1\n",
      "class_athletics--->2\n",
      "class_tennis--->3\n",
      "class_cricket--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11938,)\n",
      "len of dfs: 11938\n",
      "the vocabulary size =:11938\n",
      "time passed:2437.2479932308197\n",
      "DC0:base entropy=-1.5327681419112928, DC0:basete troenpy=-0.26680351480968445\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11938\n",
      "fle max=3.0957298086708156\n",
      "normalizing bias features ...\n",
      "number of ebias features =11938\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11938,)\n",
      "len of dfs: 11938\n",
      "the vocabulary size =:11938\n",
      "time passed:2441.4197821617126\n",
      "DC0:base entropy=-1.5327681419112928, DC0:basete troenpy=-0.26680351480968445\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11938\n",
      "fle max=10000.652456962243\n",
      "normalizing bias features ...\n",
      "number of ebias features =11938\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11938,)\n",
      "len of dfs: 11938\n",
      "the vocabulary size =:11938\n",
      "time passed:2445.43709897995\n",
      "DC0:base entropy=-1.5327681419112928, DC0:basete troenpy=-0.26680351480968445\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11938\n",
      "fle max=-2.0309218037064074\n",
      "normalizing bias features ...\n",
      "number of ebias features =11938\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11938,)\n",
      "len of dfs: 11938\n",
      "the vocabulary size =:11938\n",
      "time passed:2449.447389125824\n",
      "DC0:base entropy=-1.5327681419112928, DC0:basete troenpy=-0.26680351480968445\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11938\n",
      "fle max=4.500910019406944\n",
      "normalizing bias features ...\n",
      "number of ebias features =11938\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11938,)\n",
      "len of dfs: 11938\n",
      "the vocabulary size =:11938\n",
      "time passed:2453.431442975998\n",
      "DC0:base entropy=-1.5327681419112928, DC0:basete troenpy=-0.26680351480968445\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11938\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 1 sampling ....\n",
      "/Users/arthur/work/PN_manus2023/PN/manus/iclr2024/iclr2025/data/all_bbcsport_by_line.txt\n",
      "class_rugby--->0\n",
      "class_football--->1\n",
      "class_athletics--->2\n",
      "class_tennis--->3\n",
      "class_cricket--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12036,)\n",
      "len of dfs: 12036\n",
      "the vocabulary size =:12036\n",
      "time passed:2457.2746500968933\n",
      "DC0:base entropy=-1.5273632526706906, DC0:basete troenpy=-0.2704265160100944\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12036\n",
      "fle max=3.0972969714570344\n",
      "normalizing bias features ...\n",
      "number of ebias features =12036\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12036,)\n",
      "len of dfs: 12036\n",
      "the vocabulary size =:12036\n",
      "time passed:2461.324345111847\n",
      "DC0:base entropy=-1.5273632526706906, DC0:basete troenpy=-0.2704265160100944\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12036\n",
      "fle max=10000.654765966674\n",
      "normalizing bias features ...\n",
      "number of ebias features =12036\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12036,)\n",
      "len of dfs: 12036\n",
      "the vocabulary size =:12036\n",
      "time passed:2465.406085252762\n",
      "DC0:base entropy=-1.5273632526706906, DC0:basete troenpy=-0.2704265160100944\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12036\n",
      "fle max=-1.994119017077347\n",
      "normalizing bias features ...\n",
      "number of ebias features =12036\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12036,)\n",
      "len of dfs: 12036\n",
      "the vocabulary size =:12036\n",
      "time passed:2469.469412088394\n",
      "DC0:base entropy=-1.5273632526706906, DC0:basete troenpy=-0.2704265160100944\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12036\n",
      "fle max=4.504533020607354\n",
      "normalizing bias features ...\n",
      "number of ebias features =12036\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12036,)\n",
      "len of dfs: 12036\n",
      "the vocabulary size =:12036\n",
      "time passed:2473.478222131729\n",
      "DC0:base entropy=-1.5273632526706906, DC0:basete troenpy=-0.2704265160100944\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =12036\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 2 sampling ....\n",
      "/Users/arthur/work/PN_manus2023/PN/manus/iclr2024/iclr2025/data/all_bbcsport_by_line.txt\n",
      "class_rugby--->0\n",
      "class_football--->1\n",
      "class_athletics--->2\n",
      "class_tennis--->3\n",
      "class_cricket--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12092,)\n",
      "len of dfs: 12092\n",
      "the vocabulary size =:12092\n",
      "time passed:2477.4334831237793\n",
      "DC0:base entropy=-1.5140149265317844, DC0:basete troenpy=-0.28019216262085184\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12092\n",
      "fle max=3.0838963186247987\n",
      "normalizing bias features ...\n",
      "number of ebias features =12092\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12092,)\n",
      "len of dfs: 12092\n",
      "the vocabulary size =:12092\n",
      "time passed:2481.3212871551514\n",
      "DC0:base entropy=-1.5140149265317844, DC0:basete troenpy=-0.28019216262085184\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12092\n",
      "fle max=10000.660539097988\n",
      "normalizing bias features ...\n",
      "number of ebias features =12092\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12092,)\n",
      "len of dfs: 12092\n",
      "the vocabulary size =:12092\n",
      "time passed:2485.2584612369537\n",
      "DC0:base entropy=-1.5140149265317844, DC0:basete troenpy=-0.28019216262085184\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12092\n",
      "fle max=-2.0158440645708\n",
      "normalizing bias features ...\n",
      "number of ebias features =12092\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12092,)\n",
      "len of dfs: 12092\n",
      "the vocabulary size =:12092\n",
      "time passed:2489.268040895462\n",
      "DC0:base entropy=-1.5140149265317844, DC0:basete troenpy=-0.28019216262085184\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =12092\n",
      "fle max=4.584257255825022\n",
      "normalizing bias features ...\n",
      "number of ebias features =12092\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12092,)\n",
      "len of dfs: 12092\n",
      "the vocabulary size =:12092\n",
      "time passed:2493.1963171958923\n",
      "DC0:base entropy=-1.5140149265317844, DC0:basete troenpy=-0.28019216262085184\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =12092\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "(0.013605442176870763, 0.0)\n"
     ]
    }
   ],
   "source": [
    "dfbbc3=repeatSampling_knn(bbcfn, Nsample=3)\n",
    "df=dfbbc3\n",
    "print((np.mean(df.nege), np.std(df.nege)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e22a71f5",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "52423116",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "4b7d6816",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " ... 0 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11922,)\n",
      "len of dfs: 11922\n",
      "the vocabulary size =:11922\n",
      "time passed:22018.52162718773\n",
      "DC0:base entropy=-1.512712254947843, DC0:basete troenpy=-0.28174077620041615\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11922\n",
      "fle max=10000.661107958585\n",
      "normalizing bias features ...\n",
      "number of ebias features =11922\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 1 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12020,)\n",
      "len of dfs: 12020\n",
      "the vocabulary size =:12020\n",
      "time passed:22022.744752168655\n",
      "DC0:base entropy=-1.5265980820399996, DC0:basete troenpy=-0.27180561870587044\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12020\n",
      "fle max=10000.655094173891\n",
      "normalizing bias features ...\n",
      "number of ebias features =12020\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 2 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11955,)\n",
      "len of dfs: 11955\n",
      "the vocabulary size =:11955\n",
      "time passed:22026.821676015854\n",
      "DC0:base entropy=-1.5372405879542093, DC0:basete troenpy=-0.2635541634797151\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11955\n",
      "fle max=10000.650558581197\n",
      "normalizing bias features ...\n",
      "number of ebias features =11955\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 3 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11767,)\n",
      "len of dfs: 11767\n",
      "the vocabulary size =:11767\n",
      "time passed:22031.642578125\n",
      "DC0:base entropy=-1.5291004945229514, DC0:basete troenpy=-0.26932619960267096\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11767\n",
      "fle max=10000.654022025226\n",
      "normalizing bias features ...\n",
      "number of ebias features =11767\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 4 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11932,)\n",
      "len of dfs: 11932\n",
      "the vocabulary size =:11932\n",
      "time passed:22036.550050020218\n",
      "DC0:base entropy=-1.500306103959147, DC0:basete troenpy=-0.28990833108594066\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11932\n",
      "fle max=10000.666575077492\n",
      "normalizing bias features ...\n",
      "number of ebias features =11932\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 5 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11759,)\n",
      "len of dfs: 11759\n",
      "the vocabulary size =:11759\n",
      "time passed:22041.6140999794\n",
      "DC0:base entropy=-1.5233986662138201, DC0:basete troenpy=-0.2735310650361612\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11759\n",
      "fle max=10000.65647008179\n",
      "normalizing bias features ...\n",
      "number of ebias features =11759\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 6 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11789,)\n",
      "len of dfs: 11789\n",
      "the vocabulary size =:11789\n",
      "time passed:22046.16865992546\n",
      "DC0:base entropy=-1.5393276638406017, DC0:basete troenpy=-0.26223679025911695\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11789\n",
      "fle max=10000.649676473138\n",
      "normalizing bias features ...\n",
      "number of ebias features =11789\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 7 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11906,)\n",
      "len of dfs: 11906\n",
      "the vocabulary size =:11906\n",
      "time passed:22050.89493703842\n",
      "DC0:base entropy=-1.5396975273567708, DC0:basete troenpy=-0.2616935476503195\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11906\n",
      "fle max=10000.649520398825\n",
      "normalizing bias features ...\n",
      "number of ebias features =11906\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 8 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11767,)\n",
      "len of dfs: 11767\n",
      "the vocabulary size =:11767\n",
      "time passed:22055.87531685829\n",
      "DC0:base entropy=-1.527094942969849, DC0:basete troenpy=-0.2710702711417102\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11767\n",
      "fle max=10000.65488101621\n",
      "normalizing bias features ...\n",
      "number of ebias features =11767\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 9 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11935,)\n",
      "len of dfs: 11935\n",
      "the vocabulary size =:11935\n",
      "time passed:22060.58412003517\n",
      "DC0:base entropy=-1.518628142892419, DC0:basete troenpy=-0.27762282136202865\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11935\n",
      "fle max=10000.658532411586\n",
      "normalizing bias features ...\n",
      "number of ebias features =11935\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 10 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11782,)\n",
      "len of dfs: 11782\n",
      "the vocabulary size =:11782\n",
      "time passed:22064.6766769886\n",
      "DC0:base entropy=-1.526731038815949, DC0:basete troenpy=-0.27025717433908475\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11782\n",
      "fle max=10000.655037120676\n",
      "normalizing bias features ...\n",
      "number of ebias features =11782\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 11 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11852,)\n",
      "len of dfs: 11852\n",
      "the vocabulary size =:11852\n",
      "time passed:22068.63214612007\n",
      "DC0:base entropy=-1.5297182398203835, DC0:basete troenpy=-0.26948285760159224\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11852\n",
      "fle max=10000.653757894595\n",
      "normalizing bias features ...\n",
      "number of ebias features =11852\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 12 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11965,)\n",
      "len of dfs: 11965\n",
      "the vocabulary size =:11965\n",
      "time passed:22072.522130012512\n",
      "DC0:base entropy=-1.5176134694835899, DC0:basete troenpy=-0.2790462450679025\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11965\n",
      "fle max=10000.658972734087\n",
      "normalizing bias features ...\n",
      "number of ebias features =11965\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 13 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12006,)\n",
      "len of dfs: 12006\n",
      "the vocabulary size =:12006\n",
      "time passed:22076.483676195145\n",
      "DC0:base entropy=-1.5102499118927524, DC0:basete troenpy=-0.28333825214554115\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12006\n",
      "fle max=10000.662185914209\n",
      "normalizing bias features ...\n",
      "number of ebias features =12006\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 14 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11996,)\n",
      "len of dfs: 11996\n",
      "the vocabulary size =:11996\n",
      "time passed:22080.356882095337\n",
      "DC0:base entropy=-1.5276114966163479, DC0:basete troenpy=-0.26947337670052146\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11996\n",
      "fle max=10000.654659557205\n",
      "normalizing bias features ...\n",
      "number of ebias features =11996\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 15 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11836,)\n",
      "len of dfs: 11836\n",
      "the vocabulary size =:11836\n",
      "time passed:22084.287133216858\n",
      "DC0:base entropy=-1.5295260065804743, DC0:basete troenpy=-0.2700284550528695\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11836\n",
      "fle max=10000.653840065292\n",
      "normalizing bias features ...\n",
      "number of ebias features =11836\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 16 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11991,)\n",
      "len of dfs: 11991\n",
      "the vocabulary size =:11991\n",
      "time passed:22088.1465818882\n",
      "DC0:base entropy=-1.530951609429659, DC0:basete troenpy=-0.2674728410630166\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11991\n",
      "fle max=10000.653231177888\n",
      "normalizing bias features ...\n",
      "number of ebias features =11991\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 17 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12023,)\n",
      "len of dfs: 12023\n",
      "the vocabulary size =:12023\n",
      "time passed:22092.026247024536\n",
      "DC0:base entropy=-1.531182699170881, DC0:basete troenpy=-0.26875725011145807\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12023\n",
      "fle max=10000.653132584244\n",
      "normalizing bias features ...\n",
      "number of ebias features =12023\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 18 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11867,)\n",
      "len of dfs: 11867\n",
      "the vocabulary size =:11867\n",
      "time passed:22095.876552820206\n",
      "DC0:base entropy=-1.5215370532661823, DC0:basete troenpy=-0.2749899118053747\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11867\n",
      "fle max=10000.65727333106\n",
      "normalizing bias features ...\n",
      "number of ebias features =11867\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 19 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11950,)\n",
      "len of dfs: 11950\n",
      "the vocabulary size =:11950\n",
      "time passed:22099.686972141266\n",
      "DC0:base entropy=-1.5278921562539995, DC0:basete troenpy=-0.2697193327738283\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11950\n",
      "fle max=10000.654539294437\n",
      "normalizing bias features ...\n",
      "number of ebias features =11950\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 20 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11805,)\n",
      "len of dfs: 11805\n",
      "the vocabulary size =:11805\n",
      "time passed:22103.507630109787\n",
      "DC0:base entropy=-1.5310089371292968, DC0:basete troenpy=-0.26673571505934085\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11805\n",
      "fle max=10000.653206716446\n",
      "normalizing bias features ...\n",
      "number of ebias features =11805\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 21 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12015,)\n",
      "len of dfs: 12015\n",
      "the vocabulary size =:12015\n",
      "time passed:22107.333084106445\n",
      "DC0:base entropy=-1.5083505683342204, DC0:basete troenpy=-0.2845614042240567\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =12015\n",
      "fle max=10000.66301980652\n",
      "normalizing bias features ...\n",
      "number of ebias features =12015\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 22 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11815,)\n",
      "len of dfs: 11815\n",
      "the vocabulary size =:11815\n",
      "time passed:22111.14772796631\n",
      "DC0:base entropy=-1.5239074077992132, DC0:basete troenpy=-0.27335938730606385\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11815\n",
      "fle max=10000.65625091129\n",
      "normalizing bias features ...\n",
      "number of ebias features =11815\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 23 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11797,)\n",
      "len of dfs: 11797\n",
      "the vocabulary size =:11797\n",
      "time passed:22114.934891939163\n",
      "DC0:base entropy=-1.521436937448282, DC0:basete troenpy=-0.27508545989970373\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11797\n",
      "fle max=10000.657316584764\n",
      "normalizing bias features ...\n",
      "number of ebias features =11797\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 24 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11728,)\n",
      "len of dfs: 11728\n",
      "the vocabulary size =:11728\n",
      "time passed:22118.694966077805\n",
      "DC0:base entropy=-1.5243235477120716, DC0:basete troenpy=-0.27265989264798607\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11728\n",
      "fle max=10000.656071743217\n",
      "normalizing bias features ...\n",
      "number of ebias features =11728\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 25 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11923,)\n",
      "len of dfs: 11923\n",
      "the vocabulary size =:11923\n",
      "time passed:22122.406720876694\n",
      "DC0:base entropy=-1.520848560525115, DC0:basete troenpy=-0.2761424245650441\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11923\n",
      "fle max=10000.657570900252\n",
      "normalizing bias features ...\n",
      "number of ebias features =11923\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 26 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11943,)\n",
      "len of dfs: 11943\n",
      "the vocabulary size =:11943\n",
      "time passed:22126.176937818527\n",
      "DC0:base entropy=-1.5129928888636754, DC0:basete troenpy=-0.2801320794528026\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11943\n",
      "fle max=10000.66098532643\n",
      "normalizing bias features ...\n",
      "number of ebias features =11943\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 27 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12013,)\n",
      "len of dfs: 12013\n",
      "the vocabulary size =:12013\n",
      "time passed:22129.971458911896\n",
      "DC0:base entropy=-1.5281963051327858, DC0:basete troenpy=-0.2715074471001441\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12013\n",
      "fle max=10000.654409016395\n",
      "normalizing bias features ...\n",
      "number of ebias features =12013\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 28 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11959,)\n",
      "len of dfs: 11959\n",
      "the vocabulary size =:11959\n",
      "time passed:22133.799667835236\n",
      "DC0:base entropy=-1.5255454731656817, DC0:basete troenpy=-0.27175465181309555\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11959\n",
      "fle max=10000.655546210986\n",
      "normalizing bias features ...\n",
      "number of ebias features =11959\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 29 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11868,)\n",
      "len of dfs: 11868\n",
      "the vocabulary size =:11868\n",
      "time passed:22137.579210996628\n",
      "DC0:base entropy=-1.5330048411164174, DC0:basete troenpy=-0.26590267275757173\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11868\n",
      "fle max=10000.65235621493\n",
      "normalizing bias features ...\n",
      "number of ebias features =11868\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 30 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11830,)\n",
      "len of dfs: 11830\n",
      "the vocabulary size =:11830\n",
      "time passed:22141.291431188583\n",
      "DC0:base entropy=-1.5357917598941027, DC0:basete troenpy=-0.26355279484445765\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11830\n",
      "fle max=10000.651172342077\n",
      "normalizing bias features ...\n",
      "number of ebias features =11830\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 31 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11821,)\n",
      "len of dfs: 11821\n",
      "the vocabulary size =:11821\n",
      "time passed:22145.03897213936\n",
      "DC0:base entropy=-1.5308806773409698, DC0:basete troenpy=-0.26859365340564284\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11821\n",
      "fle max=10000.65326144679\n",
      "normalizing bias features ...\n",
      "number of ebias features =11821\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 32 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11936,)\n",
      "len of dfs: 11936\n",
      "the vocabulary size =:11936\n",
      "time passed:22148.76914191246\n",
      "DC0:base entropy=-1.5150720296333622, DC0:basete troenpy=-0.2781186609446991\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11936\n",
      "fle max=10000.660078193154\n",
      "normalizing bias features ...\n",
      "number of ebias features =11936\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 33 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11935,)\n",
      "len of dfs: 11935\n",
      "the vocabulary size =:11935\n",
      "time passed:22152.54452395439\n",
      "DC0:base entropy=-1.5126172091604204, DC0:basete troenpy=-0.28281354704147055\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11935\n",
      "fle max=10000.661149502263\n",
      "normalizing bias features ...\n",
      "number of ebias features =11935\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 34 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11902,)\n",
      "len of dfs: 11902\n",
      "the vocabulary size =:11902\n",
      "time passed:22156.289197921753\n",
      "DC0:base entropy=-1.5276374971264381, DC0:basete troenpy=-0.2703127330416479\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11902\n",
      "fle max=10000.654648414118\n",
      "normalizing bias features ...\n",
      "number of ebias features =11902\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 35 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12022,)\n",
      "len of dfs: 12022\n",
      "the vocabulary size =:12022\n",
      "time passed:22160.0479221344\n",
      "DC0:base entropy=-1.5261912452271216, DC0:basete troenpy=-0.2715042749057456\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12022\n",
      "fle max=10000.655268813793\n",
      "normalizing bias features ...\n",
      "number of ebias features =12022\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 36 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11849,)\n",
      "len of dfs: 11849\n",
      "the vocabulary size =:11849\n",
      "time passed:22163.88007593155\n",
      "DC0:base entropy=-1.5102267273983676, DC0:basete troenpy=-0.28122246128323364\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11849\n",
      "fle max=10000.662196080539\n",
      "normalizing bias features ...\n",
      "number of ebias features =11849\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 37 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11782,)\n",
      "len of dfs: 11782\n",
      "the vocabulary size =:11782\n",
      "time passed:22167.617141008377\n",
      "DC0:base entropy=-1.5264042904060169, DC0:basete troenpy=-0.27183001720793043\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11782\n",
      "fle max=10000.655177349816\n",
      "normalizing bias features ...\n",
      "number of ebias features =11782\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 38 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11889,)\n",
      "len of dfs: 11889\n",
      "the vocabulary size =:11889\n",
      "time passed:22171.378111839294\n",
      "DC0:base entropy=-1.531710044053121, DC0:basete troenpy=-0.2673863324962902\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11889\n",
      "fle max=10000.652907705771\n",
      "normalizing bias features ...\n",
      "number of ebias features =11889\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 39 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11900,)\n",
      "len of dfs: 11900\n",
      "the vocabulary size =:11900\n",
      "time passed:22175.230235099792\n",
      "DC0:base entropy=-1.5138028489973847, DC0:basete troenpy=-0.28122339063503676\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11900\n",
      "fle max=10000.660631642902\n",
      "normalizing bias features ...\n",
      "number of ebias features =11900\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 40 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11972,)\n",
      "len of dfs: 11972\n",
      "the vocabulary size =:11972\n",
      "time passed:22179.007395029068\n",
      "DC0:base entropy=-1.5155211838557716, DC0:basete troenpy=-0.2789222901184922\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11972\n",
      "fle max=10000.659882553216\n",
      "normalizing bias features ...\n",
      "number of ebias features =11972\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 41 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11976,)\n",
      "len of dfs: 11976\n",
      "the vocabulary size =:11976\n",
      "time passed:22182.899589061737\n",
      "DC0:base entropy=-1.5193197133134764, DC0:basete troenpy=-0.27745598931201065\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11976\n",
      "fle max=10000.658232638267\n",
      "normalizing bias features ...\n",
      "number of ebias features =11976\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 42 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11847,)\n",
      "len of dfs: 11847\n",
      "the vocabulary size =:11847\n",
      "time passed:22186.65552997589\n",
      "DC0:base entropy=-1.5256242158202298, DC0:basete troenpy=-0.27168861753994755\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11847\n",
      "fle max=10000.655512373798\n",
      "normalizing bias features ...\n",
      "number of ebias features =11847\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 43 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11951,)\n",
      "len of dfs: 11951\n",
      "the vocabulary size =:11951\n",
      "time passed:22190.351284980774\n",
      "DC0:base entropy=-1.5385783739448726, DC0:basete troenpy=-0.2633991650536043\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11951\n",
      "fle max=10000.649992887085\n",
      "normalizing bias features ...\n",
      "number of ebias features =11951\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 44 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11866,)\n",
      "len of dfs: 11866\n",
      "the vocabulary size =:11866\n",
      "time passed:22194.145487070084\n",
      "DC0:base entropy=-1.5314800665159267, DC0:basete troenpy=-0.26818854336062997\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11866\n",
      "fle max=10000.653005757267\n",
      "normalizing bias features ...\n",
      "number of ebias features =11866\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 45 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11785,)\n",
      "len of dfs: 11785\n",
      "the vocabulary size =:11785\n",
      "time passed:22197.885598897934\n",
      "DC0:base entropy=-1.5186748023772985, DC0:basete troenpy=-0.2781195485459652\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11785\n",
      "fle max=10000.658512177624\n",
      "normalizing bias features ...\n",
      "number of ebias features =11785\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 46 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12062,)\n",
      "len of dfs: 12062\n",
      "the vocabulary size =:12062\n",
      "time passed:22201.58194708824\n",
      "DC0:base entropy=-1.5232381313621468, DC0:basete troenpy=-0.2722537151494481\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12062\n",
      "fle max=10000.656539272053\n",
      "normalizing bias features ...\n",
      "number of ebias features =12062\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 47 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11794,)\n",
      "len of dfs: 11794\n",
      "the vocabulary size =:11794\n",
      "time passed:22205.409406900406\n",
      "DC0:base entropy=-1.5159853067292968, DC0:basete troenpy=-0.2803452825356186\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11794\n",
      "fle max=10000.659680515117\n",
      "normalizing bias features ...\n",
      "number of ebias features =11794\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 48 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11889,)\n",
      "len of dfs: 11889\n",
      "the vocabulary size =:11889\n",
      "time passed:22209.14547085762\n",
      "DC0:base entropy=-1.5299266231147328, DC0:basete troenpy=-0.26978485908435046\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11889\n",
      "fle max=10000.653668843835\n",
      "normalizing bias features ...\n",
      "number of ebias features =11889\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 49 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11929,)\n",
      "len of dfs: 11929\n",
      "the vocabulary size =:11929\n",
      "time passed:22212.899564027786\n",
      "DC0:base entropy=-1.5269509941886383, DC0:basete troenpy=-0.26988262275901836\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11929\n",
      "fle max=10000.65494275722\n",
      "normalizing bias features ...\n",
      "number of ebias features =11929\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n"
     ]
    },
    {
     "ename": "AttributeError",
     "evalue": "'DataFrame' object has no attribute 'nege'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m/var/folders/pg/88t62z716tx4bc3mxmsldm5r0000gn/T/ipykernel_25331/4232698689.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mdfbbc3\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrepeatSampling_knn_rncf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbbcfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNsample\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdfbbc3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnege\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnege\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m   5987\u001b[0m         ):\n\u001b[1;32m   5988\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5989\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   5990\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5991\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'nege'"
     ]
    }
   ],
   "source": [
    "dfbbc3=repeatSampling_knn(bbcfn, Nsample=50)\n",
    "df=dfbbc3\n",
    "print((np.mean(df.nege), np.std(df.nege)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "0f8d545b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0.21306122448979592, 0.029883696380498852)\n"
     ]
    }
   ],
   "source": [
    "print((np.mean(df.rncf), np.std(df.rncf)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fae00425",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b9f82c4c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "f05ef529",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " ... 0 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12088,)\n",
      "len of dfs: 12088\n",
      "the vocabulary size =:12088\n",
      "time passed:6243.641940832138\n",
      "DC0:base entropy=-1.525430134739167, DC0:basete troenpy=-0.2714576206697955\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12088\n",
      "fle max=-2.03827744820029\n",
      "normalizing bias features ...\n",
      "number of ebias features =12088\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 1 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12036,)\n",
      "len of dfs: 12036\n",
      "the vocabulary size =:12036\n",
      "time passed:6247.578988075256\n",
      "DC0:base entropy=-1.523236807255611, DC0:basete troenpy=-0.2733108103733334\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12036\n",
      "fle max=-1.924571730621087\n",
      "normalizing bias features ...\n",
      "number of ebias features =12036\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 2 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11888,)\n",
      "len of dfs: 11888\n",
      "the vocabulary size =:11888\n",
      "time passed:6251.452685117722\n",
      "DC0:base entropy=-1.523667955137478, DC0:basete troenpy=-0.2744294829926603\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11888\n",
      "fle max=-1.9901348549215814\n",
      "normalizing bias features ...\n",
      "number of ebias features =11888\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 3 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11927,)\n",
      "len of dfs: 11927\n",
      "the vocabulary size =:11927\n",
      "time passed:6255.253155946732\n",
      "DC0:base entropy=-1.5216910643133459, DC0:basete troenpy=-0.2754133294500931\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11927\n",
      "fle max=-2.024553240039064\n",
      "normalizing bias features ...\n",
      "number of ebias features =11927\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 4 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11735,)\n",
      "len of dfs: 11735\n",
      "the vocabulary size =:11735\n",
      "time passed:6259.268173933029\n",
      "DC0:base entropy=-1.533085501954518, DC0:basete troenpy=-0.26626453504062153\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11735\n",
      "fle max=-1.9344204253199941\n",
      "normalizing bias features ...\n",
      "number of ebias features =11735\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 5 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11976,)\n",
      "len of dfs: 11976\n",
      "the vocabulary size =:11976\n",
      "time passed:6263.166846990585\n",
      "DC0:base entropy=-1.5169935674007489, DC0:basete troenpy=-0.2782057837065163\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11976\n",
      "fle max=-2.0222117446148262\n",
      "normalizing bias features ...\n",
      "number of ebias features =11976\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 6 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11799,)\n",
      "len of dfs: 11799\n",
      "the vocabulary size =:11799\n",
      "time passed:6267.113537073135\n",
      "DC0:base entropy=-1.5323253025108183, DC0:basete troenpy=-0.2673874798051114\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11799\n",
      "fle max=-2.0124877346910788\n",
      "normalizing bias features ...\n",
      "number of ebias features =11799\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 7 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11877,)\n",
      "len of dfs: 11877\n",
      "the vocabulary size =:11877\n",
      "time passed:6271.13222694397\n",
      "DC0:base entropy=-1.5406794229065537, DC0:basete troenpy=-0.26129093668481185\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11877\n",
      "fle max=-2.0342218707514395\n",
      "normalizing bias features ...\n",
      "number of ebias features =11877\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 8 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11892,)\n",
      "len of dfs: 11892\n",
      "the vocabulary size =:11892\n",
      "time passed:6275.148646116257\n",
      "DC0:base entropy=-1.5131794806877665, DC0:basete troenpy=-0.27930013499721373\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11892\n",
      "fle max=-1.9107957206219848\n",
      "normalizing bias features ...\n",
      "number of ebias features =11892\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 9 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11884,)\n",
      "len of dfs: 11884\n",
      "the vocabulary size =:11884\n",
      "time passed:6279.217522859573\n",
      "DC0:base entropy=-1.5293106025200447, DC0:basete troenpy=-0.26867740846873145\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11884\n",
      "fle max=-2.0431727214187148\n",
      "normalizing bias features ...\n",
      "number of ebias features =11884\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 10 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11758,)\n",
      "len of dfs: 11758\n",
      "the vocabulary size =:11758\n",
      "time passed:6283.32578086853\n",
      "DC0:base entropy=-1.5186802169438356, DC0:basete troenpy=-0.2767560784878685\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11758\n",
      "fle max=-1.9311827857974722\n",
      "normalizing bias features ...\n",
      "number of ebias features =11758\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 11 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11754,)\n",
      "len of dfs: 11754\n",
      "the vocabulary size =:11754\n",
      "time passed:6287.409645080566\n",
      "DC0:base entropy=-1.5080406021551176, DC0:basete troenpy=-0.2878189139615161\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11754\n",
      "fle max=-1.9364035208054384\n",
      "normalizing bias features ...\n",
      "number of ebias features =11754\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 12 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11947,)\n",
      "len of dfs: 11947\n",
      "the vocabulary size =:11947\n",
      "time passed:6291.392122030258\n",
      "DC0:base entropy=-1.528325308347834, DC0:basete troenpy=-0.2704625244323187\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11947\n",
      "fle max=-1.9776285230432507\n",
      "normalizing bias features ...\n",
      "number of ebias features =11947\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 13 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11812,)\n",
      "len of dfs: 11812\n",
      "the vocabulary size =:11812\n",
      "time passed:6295.558495044708\n",
      "DC0:base entropy=-1.5285091926853136, DC0:basete troenpy=-0.26964443062134347\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11812\n",
      "fle max=-2.009268915209643\n",
      "normalizing bias features ...\n",
      "number of ebias features =11812\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 14 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11987,)\n",
      "len of dfs: 11987\n",
      "the vocabulary size =:11987\n",
      "time passed:6299.687391042709\n",
      "DC0:base entropy=-1.519149232682894, DC0:basete troenpy=-0.2768563872893171\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11987\n",
      "fle max=-1.9802666037059033\n",
      "normalizing bias features ...\n",
      "number of ebias features =11987\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 15 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11730,)\n",
      "len of dfs: 11730\n",
      "the vocabulary size =:11730\n",
      "time passed:6303.71791100502\n",
      "DC0:base entropy=-1.5239456199686991, DC0:basete troenpy=-0.27277804587346194\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11730\n",
      "fle max=-1.9639346950296912\n",
      "normalizing bias features ...\n",
      "number of ebias features =11730\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 16 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11779,)\n",
      "len of dfs: 11779\n",
      "the vocabulary size =:11779\n",
      "time passed:6307.787277936935\n",
      "DC0:base entropy=-1.5267862060786768, DC0:basete troenpy=-0.27074475690191413\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11779\n",
      "fle max=-2.0239160079610437\n",
      "normalizing bias features ...\n",
      "number of ebias features =11779\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 17 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12003,)\n",
      "len of dfs: 12003\n",
      "the vocabulary size =:12003\n",
      "time passed:6311.794903039932\n",
      "DC0:base entropy=-1.5251249179328263, DC0:basete troenpy=-0.272848976134066\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12003\n",
      "fle max=-2.02436923694719\n",
      "normalizing bias features ...\n",
      "number of ebias features =12003\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 18 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11965,)\n",
      "len of dfs: 11965\n",
      "the vocabulary size =:11965\n",
      "time passed:6315.792536973953\n",
      "DC0:base entropy=-1.52442705026595, DC0:basete troenpy=-0.27310168757006614\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11965\n",
      "fle max=-1.9791575484168074\n",
      "normalizing bias features ...\n",
      "number of ebias features =11965\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 19 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12012,)\n",
      "len of dfs: 12012\n",
      "the vocabulary size =:12012\n",
      "time passed:6319.914057970047\n",
      "DC0:base entropy=-1.5237602565874824, DC0:basete troenpy=-0.2742769797713128\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12012\n",
      "fle max=-1.9194339503741973\n",
      "normalizing bias features ...\n",
      "number of ebias features =12012\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 20 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11868,)\n",
      "len of dfs: 11868\n",
      "the vocabulary size =:11868\n",
      "time passed:6323.990984916687\n",
      "DC0:base entropy=-1.5206174172474012, DC0:basete troenpy=-0.27582440001205677\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11868\n",
      "fle max=-2.0041939514342646\n",
      "normalizing bias features ...\n",
      "number of ebias features =11868\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 21 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11824,)\n",
      "len of dfs: 11824\n",
      "the vocabulary size =:11824\n",
      "time passed:6327.985620975494\n",
      "DC0:base entropy=-1.533445869246617, DC0:basete troenpy=-0.26567465140341245\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11824\n",
      "fle max=-1.936706694203343\n",
      "normalizing bias features ...\n",
      "number of ebias features =11824\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 22 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11949,)\n",
      "len of dfs: 11949\n",
      "the vocabulary size =:11949\n",
      "time passed:6331.985563993454\n",
      "DC0:base entropy=-1.5235258286123106, DC0:basete troenpy=-0.2734275266773476\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11949\n",
      "fle max=-1.9607902394736239\n",
      "normalizing bias features ...\n",
      "number of ebias features =11949\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 23 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11929,)\n",
      "len of dfs: 11929\n",
      "the vocabulary size =:11929\n",
      "time passed:6336.018847942352\n",
      "DC0:base entropy=-1.5147269700814898, DC0:basete troenpy=-0.2799298766558266\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11929\n",
      "fle max=-2.045314325134317\n",
      "normalizing bias features ...\n",
      "number of ebias features =11929\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 24 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11826,)\n",
      "len of dfs: 11826\n",
      "the vocabulary size =:11826\n",
      "time passed:6340.047145843506\n",
      "DC0:base entropy=-1.5218534454962083, DC0:basete troenpy=-0.27575190914891756\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11826\n",
      "fle max=-1.9864587260269149\n",
      "normalizing bias features ...\n",
      "number of ebias features =11826\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 25 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11979,)\n",
      "len of dfs: 11979\n",
      "the vocabulary size =:11979\n",
      "time passed:6343.987420082092\n",
      "DC0:base entropy=-1.5182206085646222, DC0:basete troenpy=-0.27740700752412467\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11979\n",
      "fle max=-1.9090246082096483\n",
      "normalizing bias features ...\n",
      "number of ebias features =11979\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 26 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11887,)\n",
      "len of dfs: 11887\n",
      "the vocabulary size =:11887\n",
      "time passed:6348.024257898331\n",
      "DC0:base entropy=-1.52376117935821, DC0:basete troenpy=-0.2723485519266351\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11887\n",
      "fle max=-2.0173036272030958\n",
      "normalizing bias features ...\n",
      "number of ebias features =11887\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 27 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11821,)\n",
      "len of dfs: 11821\n",
      "the vocabulary size =:11821\n",
      "time passed:6352.1447541713715\n",
      "DC0:base entropy=-1.5258030131162077, DC0:basete troenpy=-0.27111921657435056\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11821\n",
      "fle max=-2.08368572157559\n",
      "normalizing bias features ...\n",
      "number of ebias features =11821\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 28 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11938,)\n",
      "len of dfs: 11938\n",
      "the vocabulary size =:11938\n",
      "time passed:6356.087079048157\n",
      "DC0:base entropy=-1.5284397940088639, DC0:basete troenpy=-0.2699943248510018\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11938\n",
      "fle max=-1.9651386343782968\n",
      "normalizing bias features ...\n",
      "number of ebias features =11938\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 29 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11848,)\n",
      "len of dfs: 11848\n",
      "the vocabulary size =:11848\n",
      "time passed:6360.019552946091\n",
      "DC0:base entropy=-1.521827387337415, DC0:basete troenpy=-0.27476272211817754\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11848\n",
      "fle max=-2.0398899652854756\n",
      "normalizing bias features ...\n",
      "number of ebias features =11848\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 30 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11761,)\n",
      "len of dfs: 11761\n",
      "the vocabulary size =:11761\n",
      "time passed:6364.018883943558\n",
      "DC0:base entropy=-1.5237815547044415, DC0:basete troenpy=-0.2739616486525717\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11761\n",
      "fle max=-1.9730847693998577\n",
      "normalizing bias features ...\n",
      "number of ebias features =11761\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 31 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11800,)\n",
      "len of dfs: 11800\n",
      "the vocabulary size =:11800\n",
      "time passed:6368.0578989982605\n",
      "DC0:base entropy=-1.5237931688011366, DC0:basete troenpy=-0.2729082146994095\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11800\n",
      "fle max=-1.9762211757644361\n",
      "normalizing bias features ...\n",
      "number of ebias features =11800\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 32 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11901,)\n",
      "len of dfs: 11901\n",
      "the vocabulary size =:11901\n",
      "time passed:6372.3481550216675\n",
      "DC0:base entropy=-1.5179714450381008, DC0:basete troenpy=-0.2784417127382852\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11901\n",
      "fle max=-1.957960520099093\n",
      "normalizing bias features ...\n",
      "number of ebias features =11901\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 33 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11874,)\n",
      "len of dfs: 11874\n",
      "the vocabulary size =:11874\n",
      "time passed:6376.353543996811\n",
      "DC0:base entropy=-1.524966376733178, DC0:basete troenpy=-0.27336847939136555\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11874\n",
      "fle max=-2.001810179541154\n",
      "normalizing bias features ...\n",
      "number of ebias features =11874\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 34 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11857,)\n",
      "len of dfs: 11857\n",
      "the vocabulary size =:11857\n",
      "time passed:6380.237900018692\n",
      "DC0:base entropy=-1.539239735213391, DC0:basete troenpy=-0.26124032696113797\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11857\n",
      "fle max=-1.933304449339058\n",
      "normalizing bias features ...\n",
      "number of ebias features =11857\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 35 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11924,)\n",
      "len of dfs: 11924\n",
      "the vocabulary size =:11924\n",
      "time passed:6384.092698097229\n",
      "DC0:base entropy=-1.523744483853739, DC0:basete troenpy=-0.2738872078482036\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11924\n",
      "fle max=-1.966163088088762\n",
      "normalizing bias features ...\n",
      "number of ebias features =11924\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 36 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11986,)\n",
      "len of dfs: 11986\n",
      "the vocabulary size =:11986\n",
      "time passed:6388.161724090576\n",
      "DC0:base entropy=-1.514546389839972, DC0:basete troenpy=-0.281514630132203\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11986\n",
      "fle max=-1.8993355384839514\n",
      "normalizing bias features ...\n",
      "number of ebias features =11986\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 37 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11943,)\n",
      "len of dfs: 11943\n",
      "the vocabulary size =:11943\n",
      "time passed:6392.399282217026\n",
      "DC0:base entropy=-1.5346380997699571, DC0:basete troenpy=-0.2642943998129713\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11943\n",
      "fle max=-1.9931768923455744\n",
      "normalizing bias features ...\n",
      "number of ebias features =11943\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 38 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11908,)\n",
      "len of dfs: 11908\n",
      "the vocabulary size =:11908\n",
      "time passed:6396.586941957474\n",
      "DC0:base entropy=-1.53329638891068, DC0:basete troenpy=-0.2674513538559265\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11908\n",
      "fle max=-2.035324672614288\n",
      "normalizing bias features ...\n",
      "number of ebias features =11908\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 39 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11890,)\n",
      "len of dfs: 11890\n",
      "the vocabulary size =:11890\n",
      "time passed:6400.537034034729\n",
      "DC0:base entropy=-1.528356098438656, DC0:basete troenpy=-0.2700230232225968\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11890\n",
      "fle max=-1.9776593131340725\n",
      "normalizing bias features ...\n",
      "number of ebias features =11890\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 40 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11891,)\n",
      "len of dfs: 11891\n",
      "the vocabulary size =:11891\n",
      "time passed:6404.571583032608\n",
      "DC0:base entropy=-1.5178760343211535, DC0:basete troenpy=-0.2774495506452295\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11891\n",
      "fle max=-1.9711350979926392\n",
      "normalizing bias features ...\n",
      "number of ebias features =11891\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 41 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11959,)\n",
      "len of dfs: 11959\n",
      "the vocabulary size =:11959\n",
      "time passed:6408.581543207169\n",
      "DC0:base entropy=-1.5273786093305168, DC0:basete troenpy=-0.2701878388765896\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11959\n",
      "fle max=-1.9716527623330897\n",
      "normalizing bias features ...\n",
      "number of ebias features =11959\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 42 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11918,)\n",
      "len of dfs: 11918\n",
      "the vocabulary size =:11918\n",
      "time passed:6412.598116159439\n",
      "DC0:base entropy=-1.5308671230706372, DC0:basete troenpy=-0.2689526422916306\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11918\n",
      "fle max=-2.0328954067742453\n",
      "normalizing bias features ...\n",
      "number of ebias features =11918\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 43 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11813,)\n",
      "len of dfs: 11813\n",
      "the vocabulary size =:11813\n",
      "time passed:6416.8850219249725\n",
      "DC0:base entropy=-1.5189704548490484, DC0:basete troenpy=-0.2764169891818712\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11813\n",
      "fle max=-1.9924450257956339\n",
      "normalizing bias features ...\n",
      "number of ebias features =11813\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 44 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11714,)\n",
      "len of dfs: 11714\n",
      "the vocabulary size =:11714\n",
      "time passed:6420.972189903259\n",
      "DC0:base entropy=-1.5181035958976508, DC0:basete troenpy=-0.27785059290179415\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11714\n",
      "fle max=-1.9646028486449143\n",
      "normalizing bias features ...\n",
      "number of ebias features =11714\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 45 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11984,)\n",
      "len of dfs: 11984\n",
      "the vocabulary size =:11984\n",
      "time passed:6425.090736150742\n",
      "DC0:base entropy=-1.5337819058557005, DC0:basete troenpy=-0.2656413568858063\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11984\n",
      "fle max=-1.9886965446238982\n",
      "normalizing bias features ...\n",
      "number of ebias features =11984\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 46 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11936,)\n",
      "len of dfs: 11936\n",
      "the vocabulary size =:11936\n",
      "time passed:6429.1794102191925\n",
      "DC0:base entropy=-1.5193624338148224, DC0:basete troenpy=-0.27783486835720445\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11936\n",
      "fle max=-1.9779012263904399\n",
      "normalizing bias features ...\n",
      "number of ebias features =11936\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 47 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11802,)\n",
      "len of dfs: 11802\n",
      "the vocabulary size =:11802\n",
      "time passed:6433.34231710434\n",
      "DC0:base entropy=-1.5086811959402338, DC0:basete troenpy=-0.28417983637703287\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11802\n",
      "fle max=-1.9028051779528932\n",
      "normalizing bias features ...\n",
      "number of ebias features =11802\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 48 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11833,)\n",
      "len of dfs: 11833\n",
      "the vocabulary size =:11833\n",
      "time passed:6437.3117208480835\n",
      "DC0:base entropy=-1.5302798739534738, DC0:basete troenpy=-0.26873013951897495\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11833\n",
      "fle max=-1.9907096211199613\n",
      "normalizing bias features ...\n",
      "number of ebias features =11833\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 49 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_football--->0\n",
      "class_tennis--->1\n",
      "class_rugby--->2\n",
      "class_cricket--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11712,)\n",
      "len of dfs: 11712\n",
      "the vocabulary size =:11712\n",
      "time passed:6441.652436971664\n",
      "DC0:base entropy=-1.520781646566001, DC0:basete troenpy=-0.27672109134308875\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11712\n",
      "fle max=-1.9295994500617315\n",
      "normalizing bias features ...\n",
      "number of ebias features =11712\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "(0.011292517006802734, 0.008670200798016238)\n"
     ]
    }
   ],
   "source": [
    "dfbbc2=repeatSampling_knn(bbcfn, Nsample=50)\n",
    "df=dfbbc2\n",
    "print((np.mean(df.nege), np.std(df.nege)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9753e10d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "068e94b6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " ... 0 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35899,)\n",
      "len of dfs: 35899\n",
      "the vocabulary size =:35899\n",
      "time passed:6616.720417022705\n",
      "DC0:base entropy=-1.3856077150363717, DC0:basete troenpy=-0.28752205163310374\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35899\n",
      "fle max=-1.5293046031240087\n",
      "normalizing bias features ...\n",
      "number of ebias features =35899\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 1 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36096,)\n",
      "len of dfs: 36096\n",
      "the vocabulary size =:36096\n",
      "time passed:6676.233782052994\n",
      "DC0:base entropy=-1.385660092799144, DC0:basete troenpy=-0.28748113042294976\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36096\n",
      "fle max=-1.5475285821750855\n",
      "normalizing bias features ...\n",
      "number of ebias features =36096\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 2 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36116,)\n",
      "len of dfs: 36116\n",
      "the vocabulary size =:36116\n",
      "time passed:6735.725136041641\n",
      "DC0:base entropy=-1.38566203570063, DC0:basete troenpy=-0.2874796075059003\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36116\n",
      "fle max=-1.6700803015353418\n",
      "normalizing bias features ...\n",
      "number of ebias features =36116\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 3 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36163,)\n",
      "len of dfs: 36163\n",
      "the vocabulary size =:36163\n",
      "time passed:6794.686660051346\n",
      "DC0:base entropy=-1.385633214957124, DC0:basete troenpy=-0.2875020042380207\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36163\n",
      "fle max=-1.5258713135416337\n",
      "normalizing bias features ...\n",
      "number of ebias features =36163\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 4 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35873,)\n",
      "len of dfs: 35873\n",
      "the vocabulary size =:35873\n",
      "time passed:6853.249130964279\n",
      "DC0:base entropy=-1.3856223895360458, DC0:basete troenpy=-0.2875104391925653\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35873\n",
      "fle max=-1.5269929112873175\n",
      "normalizing bias features ...\n",
      "number of ebias features =35873\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 5 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36240,)\n",
      "len of dfs: 36240\n",
      "the vocabulary size =:36240\n",
      "time passed:6910.003839015961\n",
      "DC0:base entropy=-1.3855905145339311, DC0:basete troenpy=-0.28753494913973277\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36240\n",
      "fle max=-1.5479007774953393\n",
      "normalizing bias features ...\n",
      "number of ebias features =36240\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 6 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36089,)\n",
      "len of dfs: 36089\n",
      "the vocabulary size =:36089\n",
      "time passed:6966.5330312252045\n",
      "DC0:base entropy=-1.3855531297969268, DC0:basete troenpy=-0.2875643891989715\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36089\n",
      "fle max=-1.5422782551868557\n",
      "normalizing bias features ...\n",
      "number of ebias features =36089\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 7 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35981,)\n",
      "len of dfs: 35981\n",
      "the vocabulary size =:35981\n",
      "time passed:7022.676912069321\n",
      "DC0:base entropy=-1.3856231977976015, DC0:basete troenpy=-0.2875097272245785\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35981\n",
      "fle max=-1.644627982361128\n",
      "normalizing bias features ...\n",
      "number of ebias features =35981\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 8 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36329,)\n",
      "len of dfs: 36329\n",
      "the vocabulary size =:36329\n",
      "time passed:7078.832375049591\n",
      "DC0:base entropy=-1.385568372680413, DC0:basete troenpy=-0.28755208334568816\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36329\n",
      "fle max=-1.5225256889412513\n",
      "normalizing bias features ...\n",
      "number of ebias features =36329\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 9 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36123,)\n",
      "len of dfs: 36123\n",
      "the vocabulary size =:36123\n",
      "time passed:7135.642252922058\n",
      "DC0:base entropy=-1.3856467746302232, DC0:basete troenpy=-0.2874914429679276\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36123\n",
      "fle max=-1.5649748793864438\n",
      "normalizing bias features ...\n",
      "number of ebias features =36123\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 10 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36101,)\n",
      "len of dfs: 36101\n",
      "the vocabulary size =:36101\n",
      "time passed:7194.291619062424\n",
      "DC0:base entropy=-1.3856067911437226, DC0:basete troenpy=-0.28752236969432254\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36101\n",
      "fle max=-1.5345093392064064\n",
      "normalizing bias features ...\n",
      "number of ebias features =36101\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 11 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "shape of dfs:(35810,)\n",
      "len of dfs: 35810\n",
      "the vocabulary size =:35810\n",
      "time passed:7251.850322961807\n",
      "DC0:base entropy=-1.3856231977976015, DC0:basete troenpy=-0.2875097272245785\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35810\n",
      "fle max=-1.528146458747833\n",
      "normalizing bias features ...\n",
      "number of ebias features =35810\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 12 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36487,)\n",
      "len of dfs: 36487\n",
      "the vocabulary size =:36487\n",
      "time passed:7307.968333005905\n",
      "DC0:base entropy=-1.3856467746302232, DC0:basete troenpy=-0.2874914429679276\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36487\n",
      "fle max=-1.5511252304645908\n",
      "normalizing bias features ...\n",
      "number of ebias features =36487\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 13 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35934,)\n",
      "len of dfs: 35934\n",
      "the vocabulary size =:35934\n",
      "time passed:7365.658348083496\n",
      "DC0:base entropy=-1.3856397598607486, DC0:basete troenpy=-0.28749691712743586\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35934\n",
      "fle max=-1.5499147819156207\n",
      "normalizing bias features ...\n",
      "number of ebias features =35934\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 14 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36126,)\n",
      "len of dfs: 36126\n",
      "the vocabulary size =:36126\n",
      "time passed:7421.711940050125\n",
      "DC0:base entropy=-1.3856381426962388, DC0:basete troenpy=-0.2874982256917027\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36126\n",
      "fle max=-1.5904591953399119\n",
      "normalizing bias features ...\n",
      "number of ebias features =36126\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 15 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35764,)\n",
      "len of dfs: 35764\n",
      "the vocabulary size =:35764\n",
      "time passed:7478.141494035721\n",
      "DC0:base entropy=-1.385628133806815, DC0:basete troenpy=-0.2875060676991585\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35764\n",
      "fle max=-1.5269986555580868\n",
      "normalizing bias features ...\n",
      "number of ebias features =35764\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 16 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36080,)\n",
      "len of dfs: 36080\n",
      "the vocabulary size =:36080\n",
      "time passed:7533.501659154892\n",
      "DC0:base entropy=-1.385576829619485, DC0:basete troenpy=-0.28754534502829293\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36080\n",
      "fle max=-1.5395432426218614\n",
      "normalizing bias features ...\n",
      "number of ebias features =36080\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 17 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35883,)\n",
      "len of dfs: 35883\n",
      "the vocabulary size =:35883\n",
      "time passed:7589.677088022232\n",
      "DC0:base entropy=-1.385602014539929, DC0:basete troenpy=-0.28752609407510543\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35883\n",
      "fle max=-1.5281252754901606\n",
      "normalizing bias features ...\n",
      "number of ebias features =35883\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 18 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36228,)\n",
      "len of dfs: 36228\n",
      "the vocabulary size =:36228\n",
      "time passed:7646.376266002655\n",
      "DC0:base entropy=-1.3855992491328308, DC0:basete troenpy=-0.2875285550961263\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36228\n",
      "fle max=-1.5292961372204679\n",
      "normalizing bias features ...\n",
      "number of ebias features =36228\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 19 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35980,)\n",
      "len of dfs: 35980\n",
      "the vocabulary size =:35980\n",
      "time passed:7702.800304174423\n",
      "DC0:base entropy=-1.3856669211542723, DC0:basete troenpy=-0.2874758117321927\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35980\n",
      "fle max=-1.550684479261633\n",
      "normalizing bias features ...\n",
      "number of ebias features =35980\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 20 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36212,)\n",
      "len of dfs: 36212\n",
      "the vocabulary size =:36212\n",
      "time passed:7759.289204835892\n",
      "DC0:base entropy=-1.3856361543634708, DC0:basete troenpy=-0.28749972997540685\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36212\n",
      "fle max=-1.5317453596617128\n",
      "normalizing bias features ...\n",
      "number of ebias features =36212\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 21 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36100,)\n",
      "len of dfs: 36100\n",
      "the vocabulary size =:36100\n",
      "time passed:7815.743576049805\n",
      "DC0:base entropy=-1.38551915878176, DC0:basete troenpy=-0.2875912883780072\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36100\n",
      "fle max=-1.5679185154427717\n",
      "normalizing bias features ...\n",
      "number of ebias features =36100\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 22 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35685,)\n",
      "len of dfs: 35685\n",
      "the vocabulary size =:35685\n",
      "time passed:7872.039357185364\n",
      "DC0:base entropy=-1.3855912509413784, DC0:basete troenpy=-0.28753432362502435\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =35685\n",
      "fle max=-1.5565124137528357\n",
      "normalizing bias features ...\n",
      "number of ebias features =35685\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 23 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36358,)\n",
      "len of dfs: 36358\n",
      "the vocabulary size =:36358\n",
      "time passed:7927.207575082779\n",
      "DC0:base entropy=-1.385633320048972, DC0:basete troenpy=-0.28750181584612233\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36358\n",
      "fle max=-1.5270038418002434\n",
      "normalizing bias features ...\n",
      "number of ebias features =36358\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 24 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35941,)\n",
      "len of dfs: 35941\n",
      "the vocabulary size =:35941\n",
      "time passed:7983.84864616394\n",
      "DC0:base entropy=-1.385619141049533, DC0:basete troenpy=-0.2875130506632389\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35941\n",
      "fle max=-1.52861105676633\n",
      "normalizing bias features ...\n",
      "number of ebias features =35941\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 25 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36109,)\n",
      "len of dfs: 36109\n",
      "the vocabulary size =:36109\n",
      "time passed:8040.231012105942\n",
      "DC0:base entropy=-1.3856587130761895, DC0:basete troenpy=-0.28748218844697104\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36109\n",
      "fle max=-1.6623566258161508\n",
      "normalizing bias features ...\n",
      "number of ebias features =36109\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 26 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35929,)\n",
      "len of dfs: 35929\n",
      "the vocabulary size =:35929\n",
      "time passed:8105.124717950821\n",
      "DC0:base entropy=-1.3856350831511708, DC0:basete troenpy=-0.2875005666035559\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35929\n",
      "fle max=-1.5409407939826536\n",
      "normalizing bias features ...\n",
      "number of ebias features =35929\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 27 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36011,)\n",
      "len of dfs: 36011\n",
      "the vocabulary size =:36011\n",
      "time passed:8170.2236568927765\n",
      "DC0:base entropy=-1.3856322469473263, DC0:basete troenpy=-0.28750288304384414\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36011\n",
      "fle max=-1.5286241626641228\n",
      "normalizing bias features ...\n",
      "number of ebias features =36011\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 28 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36022,)\n",
      "len of dfs: 36022\n",
      "the vocabulary size =:36022\n",
      "time passed:8235.168048143387\n",
      "DC0:base entropy=-1.3856620433751445, DC0:basete troenpy=-0.2874796106405887\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36022\n",
      "fle max=-1.545789510156879\n",
      "normalizing bias features ...\n",
      "number of ebias features =36022\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 29 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36411,)\n",
      "len of dfs: 36411\n",
      "the vocabulary size =:36411\n",
      "time passed:8299.849227905273\n",
      "DC0:base entropy=-1.3856576597713022, DC0:basete troenpy=-0.2874830322809076\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36411\n",
      "fle max=-1.5305496569224466\n",
      "normalizing bias features ...\n",
      "number of ebias features =36411\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 30 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36260,)\n",
      "len of dfs: 36260\n",
      "the vocabulary size =:36260\n",
      "time passed:8366.051077127457\n",
      "DC0:base entropy=-1.3856482670999988, DC0:basete troenpy=-0.2874903150714796\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36260\n",
      "fle max=-1.54366836718055\n",
      "normalizing bias features ...\n",
      "number of ebias features =36260\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 31 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36113,)\n",
      "len of dfs: 36113\n",
      "the vocabulary size =:36113\n",
      "time passed:8431.92796587944\n",
      "DC0:base entropy=-1.3856067273661687, DC0:basete troenpy=-0.28752257455527763\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36113\n",
      "fle max=-1.6023956460723134\n",
      "normalizing bias features ...\n",
      "number of ebias features =36113\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 32 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36102,)\n",
      "len of dfs: 36102\n",
      "the vocabulary size =:36102\n",
      "time passed:8497.48556303978\n",
      "DC0:base entropy=-1.3856540340941716, DC0:basete troenpy=-0.2874858368975184\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36102\n",
      "fle max=-1.531763239392414\n",
      "normalizing bias features ...\n",
      "number of ebias features =36102\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 33 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36194,)\n",
      "len of dfs: 36194\n",
      "the vocabulary size =:36194\n",
      "time passed:8563.277685880661\n",
      "DC0:base entropy=-1.3856453925794527, DC0:basete troenpy=-0.2874925000488896\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36194\n",
      "fle max=-1.561955772108208\n",
      "normalizing bias features ...\n",
      "number of ebias features =36194\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "knn predicting ...\n",
      " ... 34 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36161,)\n",
      "len of dfs: 36161\n",
      "the vocabulary size =:36161\n",
      "time passed:8628.129122972488\n",
      "DC0:base entropy=-1.38557042482341, DC0:basete troenpy=-0.2875506044203649\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36161\n",
      "fle max=-1.5614558790448267\n",
      "normalizing bias features ...\n",
      "number of ebias features =36161\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 35 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35991,)\n",
      "len of dfs: 35991\n",
      "the vocabulary size =:35991\n",
      "time passed:8693.582593917847\n",
      "DC0:base entropy=-1.3855797164488723, DC0:basete troenpy=-0.28754316489725906\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35991\n",
      "fle max=-1.6311755801283483\n",
      "normalizing bias features ...\n",
      "number of ebias features =35991\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 36 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36289,)\n",
      "len of dfs: 36289\n",
      "the vocabulary size =:36289\n",
      "time passed:8759.112164020538\n",
      "DC0:base entropy=-1.3855450263558842, DC0:basete troenpy=-0.28757138953095734\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36289\n",
      "fle max=-1.5547495068318966\n",
      "normalizing bias features ...\n",
      "number of ebias features =36289\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 37 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35827,)\n",
      "len of dfs: 35827\n",
      "the vocabulary size =:35827\n",
      "time passed:8823.384949922562\n",
      "DC0:base entropy=-1.385564576500824, DC0:basete troenpy=-0.28755470391246035\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35827\n",
      "fle max=-1.5510430323351914\n",
      "normalizing bias features ...\n",
      "number of ebias features =35827\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 38 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36057,)\n",
      "len of dfs: 36057\n",
      "the vocabulary size =:36057\n",
      "time passed:8888.759876012802\n",
      "DC0:base entropy=-1.3856351072421573, DC0:basete troenpy=-0.28750057646112687\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36057\n",
      "fle max=-1.5230227715894296\n",
      "normalizing bias features ...\n",
      "number of ebias features =36057\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 39 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35996,)\n",
      "len of dfs: 35996\n",
      "the vocabulary size =:35996\n",
      "time passed:8954.914696931839\n",
      "DC0:base entropy=-1.3856396387912868, DC0:basete troenpy=-0.2874969836596943\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35996\n",
      "fle max=-1.5355396068832692\n",
      "normalizing bias features ...\n",
      "number of ebias features =35996\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 40 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35850,)\n",
      "len of dfs: 35850\n",
      "the vocabulary size =:35850\n",
      "time passed:9019.92132806778\n",
      "DC0:base entropy=-1.3855900409506732, DC0:basete troenpy=-0.28753591350426355\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35850\n",
      "fle max=-1.5534160851483332\n",
      "normalizing bias features ...\n",
      "number of ebias features =35850\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 41 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36007,)\n",
      "len of dfs: 36007\n",
      "the vocabulary size =:36007\n",
      "time passed:9085.127487897873\n",
      "DC0:base entropy=-1.3855925456332967, DC0:basete troenpy=-0.2875332310855051\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36007\n",
      "fle max=-1.5342050559492975\n",
      "normalizing bias features ...\n",
      "number of ebias features =36007\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 42 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36012,)\n",
      "len of dfs: 36012\n",
      "the vocabulary size =:36012\n",
      "time passed:9151.071646928787\n",
      "DC0:base entropy=-1.3855766166102366, DC0:basete troenpy=-0.2875456052571296\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36012\n",
      "fle max=-1.5236086336336687\n",
      "normalizing bias features ...\n",
      "number of ebias features =36012\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 43 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36025,)\n",
      "len of dfs: 36025\n",
      "the vocabulary size =:36025\n",
      "time passed:9215.889302015305\n",
      "DC0:base entropy=-1.385632005480514, DC0:basete troenpy=-0.2875030160199617\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36025\n",
      "fle max=-1.536844254922039\n",
      "normalizing bias features ...\n",
      "number of ebias features =36025\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 44 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35816,)\n",
      "len of dfs: 35816\n",
      "the vocabulary size =:35816\n",
      "time passed:9281.553842782974\n",
      "DC0:base entropy=-1.3856417708370432, DC0:basete troenpy=-0.28749542202910333\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35816\n",
      "fle max=-1.5247672114349182\n",
      "normalizing bias features ...\n",
      "number of ebias features =35816\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "knn predicting ...\n",
      " ... 45 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35988,)\n",
      "len of dfs: 35988\n",
      "the vocabulary size =:35988\n",
      "time passed:9346.541703939438\n",
      "DC0:base entropy=-1.3856607574717559, DC0:basete troenpy=-0.28748059116038055\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35988\n",
      "fle max=-1.5615462116931726\n",
      "normalizing bias features ...\n",
      "number of ebias features =35988\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 46 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35848,)\n",
      "len of dfs: 35848\n",
      "the vocabulary size =:35848\n",
      "time passed:9411.792667150497\n",
      "DC0:base entropy=-1.3856186739670078, DC0:basete troenpy=-0.28751343907724136\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35848\n",
      "fle max=-1.6439357686587954\n",
      "normalizing bias features ...\n",
      "number of ebias features =35848\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 47 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35834,)\n",
      "len of dfs: 35834\n",
      "the vocabulary size =:35834\n",
      "time passed:9475.822875022888\n",
      "DC0:base entropy=-1.3855942262761438, DC0:basete troenpy=-0.2875321789933184\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35834\n",
      "fle max=-1.5510726821105116\n",
      "normalizing bias features ...\n",
      "number of ebias features =35834\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 48 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36090,)\n",
      "len of dfs: 36090\n",
      "the vocabulary size =:36090\n",
      "time passed:9540.969898939133\n",
      "DC0:base entropy=-1.3856607672497483, DC0:basete troenpy=-0.2874805951349151\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36090\n",
      "fle max=-1.559643359644311\n",
      "normalizing bias features ...\n",
      "number of ebias features =36090\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 49 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "dvd--->0\n",
      "kitchen--->1\n",
      "books--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35858,)\n",
      "len of dfs: 35858\n",
      "the vocabulary size =:35858\n",
      "time passed:9598.29415178299\n",
      "DC0:base entropy=-1.385573773902666, DC0:basete troenpy=-0.28754826515117116\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35858\n",
      "fle max=-1.532922927912833\n",
      "normalizing bias features ...\n",
      "number of ebias features =35858\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "(0.1522625, 0.008642997526900021)\n"
     ]
    }
   ],
   "source": [
    "dfamazon=repeatSampling_knn(amazonfn, Nsample=50)\n",
    "df=dfamazon\n",
    "print((np.mean(df.nege), np.std(df.nege)))\n",
    "#dftw=repeatSampling_knn_ncf(twfn, Nsample=30)\n",
    "#dfclass=repeatSampling_knn_ncf(classfn, Nsample=30)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1a26afa8",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "8095f10f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " ... 0 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5371,)\n",
      "len of dfs: 5371\n",
      "the vocabulary size =:5371\n",
      "time passed:9667.587869167328\n",
      "DC0:base entropy=-0.8375131770110071, DC0:basete troenpy=-0.8491497771868417\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5371\n",
      "fle max=-1.0977945796607615\n",
      "normalizing bias features ...\n",
      "number of ebias features =5371\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 1 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5351,)\n",
      "len of dfs: 5351\n",
      "the vocabulary size =:5351\n",
      "time passed:9670.236027002335\n",
      "DC0:base entropy=-0.8393072611877346, DC0:basete troenpy=-0.8455522470590201\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5351\n",
      "fle max=-1.0670438378714362\n",
      "normalizing bias features ...\n",
      "number of ebias features =5351\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 2 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5339,)\n",
      "len of dfs: 5339\n",
      "the vocabulary size =:5339\n",
      "time passed:9672.865852117538\n",
      "DC0:base entropy=-0.8334563400921018, DC0:basete troenpy=-0.8575640699283711\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5339\n",
      "fle max=-1.0600401061158897\n",
      "normalizing bias features ...\n",
      "number of ebias features =5339\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 3 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5307,)\n",
      "len of dfs: 5307\n",
      "the vocabulary size =:5307\n",
      "time passed:9675.487884044647\n",
      "DC0:base entropy=-0.8315784213010521, DC0:basete troenpy=-0.8612573763541873\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5307\n",
      "fle max=-1.0441308826984412\n",
      "normalizing bias features ...\n",
      "number of ebias features =5307\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 4 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5303,)\n",
      "len of dfs: 5303\n",
      "the vocabulary size =:5303\n",
      "time passed:9678.089507102966\n",
      "DC0:base entropy=-0.8281073470590256, DC0:basete troenpy=-0.8685875694880588\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5303\n",
      "fle max=-1.0452026206368652\n",
      "normalizing bias features ...\n",
      "number of ebias features =5303\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 5 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5327,)\n",
      "len of dfs: 5327\n",
      "the vocabulary size =:5327\n",
      "time passed:9680.725527048111\n",
      "DC0:base entropy=-0.8356734982120457, DC0:basete troenpy=-0.8527841382461514\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5327\n",
      "fle max=-1.081762032244942\n",
      "normalizing bias features ...\n",
      "number of ebias features =5327\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 6 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5322,)\n",
      "len of dfs: 5322\n",
      "the vocabulary size =:5322\n",
      "time passed:9683.419999837875\n",
      "DC0:base entropy=-0.8290144522797364, DC0:basete troenpy=-0.8662346189691629\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5322\n",
      "fle max=-1.0751029863126327\n",
      "normalizing bias features ...\n",
      "number of ebias features =5322\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 7 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5298,)\n",
      "len of dfs: 5298\n",
      "the vocabulary size =:5298\n",
      "time passed:9686.056192874908\n",
      "DC0:base entropy=-0.8400379859980786, DC0:basete troenpy=-0.8433543231712265\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5298\n",
      "fle max=-1.0861265200309749\n",
      "normalizing bias features ...\n",
      "number of ebias features =5298\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 8 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5321,)\n",
      "len of dfs: 5321\n",
      "the vocabulary size =:5321\n",
      "time passed:9688.660573244095\n",
      "DC0:base entropy=-0.8333598560214979, DC0:basete troenpy=-0.8576027687804949\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5321\n",
      "fle max=-1.0870170329205582\n",
      "normalizing bias features ...\n",
      "number of ebias features =5321\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 9 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5303,)\n",
      "len of dfs: 5303\n",
      "the vocabulary size =:5303\n",
      "time passed:9691.31122303009\n",
      "DC0:base entropy=-0.8345553442435768, DC0:basete troenpy=-0.8551736876960938\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5303\n",
      "fle max=-1.0622919209272785\n",
      "normalizing bias features ...\n",
      "number of ebias features =5303\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 10 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5221,)\n",
      "len of dfs: 5221\n",
      "the vocabulary size =:5221\n",
      "time passed:9693.957643032074\n",
      "DC0:base entropy=-0.8338634107830257, DC0:basete troenpy=-0.8564250200487029\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5221\n",
      "fle max=-1.0098816981192165\n",
      "normalizing bias features ...\n",
      "number of ebias features =5221\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 11 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5299,)\n",
      "len of dfs: 5299\n",
      "the vocabulary size =:5299\n",
      "time passed:9696.52822303772\n",
      "DC0:base entropy=-0.8316117504672278, DC0:basete troenpy=-0.8612440752035094\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =5299\n",
      "fle max=-1.0593483271509294\n",
      "normalizing bias features ...\n",
      "number of ebias features =5299\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 12 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5304,)\n",
      "len of dfs: 5304\n",
      "the vocabulary size =:5304\n",
      "time passed:9699.163307905197\n",
      "DC0:base entropy=-0.8305560343317595, DC0:basete troenpy=-0.8626504755231973\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5304\n",
      "fle max=-1.0528300846058045\n",
      "normalizing bias features ...\n",
      "number of ebias features =5304\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 13 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5370,)\n",
      "len of dfs: 5370\n",
      "the vocabulary size =:5370\n",
      "time passed:9701.774261951447\n",
      "DC0:base entropy=-0.816094761297988, DC0:basete troenpy=-0.8927129252591434\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5370\n",
      "fle max=-1.0763761639477427\n",
      "normalizing bias features ...\n",
      "number of ebias features =5370\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 14 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5295,)\n",
      "len of dfs: 5295\n",
      "the vocabulary size =:5295\n",
      "time passed:9704.4484000206\n",
      "DC0:base entropy=-0.828547738578384, DC0:basete troenpy=-0.8674151461587197\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5295\n",
      "fle max=-1.0772244243444464\n",
      "normalizing bias features ...\n",
      "number of ebias features =5295\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 15 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5292,)\n",
      "len of dfs: 5292\n",
      "the vocabulary size =:5292\n",
      "time passed:9707.092091083527\n",
      "DC0:base entropy=-0.8351776865358511, DC0:basete troenpy=-0.8529841784121454\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5292\n",
      "fle max=-1.0629142632195527\n",
      "normalizing bias features ...\n",
      "number of ebias features =5292\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 16 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5323,)\n",
      "len of dfs: 5323\n",
      "the vocabulary size =:5323\n",
      "time passed:9709.695252895355\n",
      "DC0:base entropy=-0.8344808259895734, DC0:basete troenpy=-0.8552036704388003\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5323\n",
      "fle max=-1.0622174026732751\n",
      "normalizing bias features ...\n",
      "number of ebias features =5323\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 17 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5291,)\n",
      "len of dfs: 5291\n",
      "the vocabulary size =:5291\n",
      "time passed:9712.322504997253\n",
      "DC0:base entropy=-0.8398630746896187, DC0:basete troenpy=-0.8434257994108822\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5291\n",
      "fle max=-1.085951608722515\n",
      "normalizing bias features ...\n",
      "number of ebias features =5291\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 18 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5308,)\n",
      "len of dfs: 5308\n",
      "the vocabulary size =:5308\n",
      "time passed:9714.921823978424\n",
      "DC0:base entropy=-0.8342135960716537, DC0:basete troenpy=-0.856284321784278\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5308\n",
      "fle max=-1.133534510314302\n",
      "normalizing bias features ...\n",
      "number of ebias features =5308\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 19 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5293,)\n",
      "len of dfs: 5293\n",
      "the vocabulary size =:5293\n",
      "time passed:9717.546576023102\n",
      "DC0:base entropy=-0.8439758317654371, DC0:basete troenpy=-0.8360954551352564\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5293\n",
      "fle max=-1.0387323467331497\n",
      "normalizing bias features ...\n",
      "number of ebias features =5293\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 20 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5291,)\n",
      "len of dfs: 5291\n",
      "the vocabulary size =:5291\n",
      "time passed:9720.14472913742\n",
      "DC0:base entropy=-0.8417304757434234, DC0:basete troenpy=-0.8407711537542254\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5291\n",
      "fle max=-1.1020118783931778\n",
      "normalizing bias features ...\n",
      "number of ebias features =5291\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 21 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5275,)\n",
      "len of dfs: 5275\n",
      "the vocabulary size =:5275\n",
      "time passed:9722.727931022644\n",
      "DC0:base entropy=-0.8279247121040709, DC0:basete troenpy=-0.868659735630112\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5275\n",
      "fle max=-1.067539238607366\n",
      "normalizing bias features ...\n",
      "number of ebias features =5275\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 22 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5268,)\n",
      "len of dfs: 5268\n",
      "the vocabulary size =:5268\n",
      "time passed:9725.355850934982\n",
      "DC0:base entropy=-0.8446129395850004, DC0:basete troenpy=-0.8349002708880137\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5268\n",
      "fle max=-1.1008445093198735\n",
      "normalizing bias features ...\n",
      "number of ebias features =5268\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 23 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5320,)\n",
      "len of dfs: 5320\n",
      "the vocabulary size =:5320\n",
      "time passed:9727.968014001846\n",
      "DC0:base entropy=-0.8381193689912496, DC0:basete troenpy=-0.8469893551797102\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =5320\n",
      "fle max=-1.0842079030241458\n",
      "normalizing bias features ...\n",
      "number of ebias features =5320\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 24 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5308,)\n",
      "len of dfs: 5308\n",
      "the vocabulary size =:5308\n",
      "time passed:9730.59493303299\n",
      "DC0:base entropy=-0.8306155635600097, DC0:basete troenpy=-0.863614920327068\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5308\n",
      "fle max=-1.0767040975929059\n",
      "normalizing bias features ...\n",
      "number of ebias features =5308\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 25 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5256,)\n",
      "len of dfs: 5256\n",
      "the vocabulary size =:5256\n",
      "time passed:9733.218064069748\n",
      "DC0:base entropy=-0.8385812610368972, DC0:basete troenpy=-0.8468014719352122\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5256\n",
      "fle max=-1.0608553113109422\n",
      "normalizing bias features ...\n",
      "number of ebias features =5256\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 26 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5312,)\n",
      "len of dfs: 5312\n",
      "the vocabulary size =:5312\n",
      "time passed:9735.889122009277\n",
      "DC0:base entropy=-0.8366901372794362, DC0:basete troenpy=-0.8504441580109864\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5312\n",
      "fle max=-1.0052226935474335\n",
      "normalizing bias features ...\n",
      "number of ebias features =5312\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 27 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5341,)\n",
      "len of dfs: 5341\n",
      "the vocabulary size =:5341\n",
      "time passed:9738.654354095459\n",
      "DC0:base entropy=-0.8379234823848609, DC0:basete troenpy=-0.8480248343379321\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5341\n",
      "fle max=-1.071431014331085\n",
      "normalizing bias features ...\n",
      "number of ebias features =5341\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 28 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5287,)\n",
      "len of dfs: 5287\n",
      "the vocabulary size =:5287\n",
      "time passed:9741.280039072037\n",
      "DC0:base entropy=-0.8392335043938186, DC0:basete troenpy=-0.8455823178998466\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5287\n",
      "fle max=-1.0995149070435732\n",
      "normalizing bias features ...\n",
      "number of ebias features =5287\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 29 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5288,)\n",
      "len of dfs: 5288\n",
      "the vocabulary size =:5288\n",
      "time passed:9743.87276315689\n",
      "DC0:base entropy=-0.8415918750759293, DC0:basete troenpy=-0.8408280374695181\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5288\n",
      "fle max=-1.0638659253499743\n",
      "normalizing bias features ...\n",
      "number of ebias features =5288\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 30 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5273,)\n",
      "len of dfs: 5273\n",
      "the vocabulary size =:5273\n",
      "time passed:9746.459988832474\n",
      "DC0:base entropy=-0.8267483424727414, DC0:basete troenpy=-0.8711278228587255\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5273\n",
      "fle max=-1.087029745122496\n",
      "normalizing bias features ...\n",
      "number of ebias features =5273\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 31 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5301,)\n",
      "len of dfs: 5301\n",
      "the vocabulary size =:5301\n",
      "time passed:9749.05085682869\n",
      "DC0:base entropy=-0.8402424784144191, DC0:basete troenpy=-0.8432707276750976\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5301\n",
      "fle max=-1.0668262444382068\n",
      "normalizing bias features ...\n",
      "number of ebias features =5301\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 32 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5332,)\n",
      "len of dfs: 5332\n",
      "the vocabulary size =:5332\n",
      "time passed:9751.658185005188\n",
      "DC0:base entropy=-0.8329861128665061, DC0:basete troenpy=-0.8587307346753855\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5332\n",
      "fle max=-1.0859505078545895\n",
      "normalizing bias features ...\n",
      "number of ebias features =5332\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 33 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5340,)\n",
      "len of dfs: 5340\n",
      "the vocabulary size =:5340\n",
      "time passed:9754.272056102753\n",
      "DC0:base entropy=-0.8409429300120191, DC0:basete troenpy=-0.842037882106244\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5340\n",
      "fle max=-1.0556989842939013\n",
      "normalizing bias features ...\n",
      "number of ebias features =5340\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 34 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5278,)\n",
      "len of dfs: 5278\n",
      "the vocabulary size =:5278\n",
      "time passed:9756.907911777496\n",
      "DC0:base entropy=-0.8296392154556897, DC0:basete troenpy=-0.864993653134547\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5278\n",
      "fle max=-1.134904018646223\n",
      "normalizing bias features ...\n",
      "number of ebias features =5278\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 35 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5357,)\n",
      "len of dfs: 5357\n",
      "the vocabulary size =:5357\n",
      "time passed:9759.488232135773\n",
      "DC0:base entropy=-0.847396403835061, DC0:basete troenpy=-0.8291208008110766\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =5357\n",
      "fle max=-1.0870109303383562\n",
      "normalizing bias features ...\n",
      "number of ebias features =5357\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 36 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5295,)\n",
      "len of dfs: 5295\n",
      "the vocabulary size =:5295\n",
      "time passed:9762.107959985733\n",
      "DC0:base entropy=-0.8327056571365951, DC0:basete troenpy=-0.8588430444391656\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5295\n",
      "fle max=-1.0787941911694912\n",
      "normalizing bias features ...\n",
      "number of ebias features =5295\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 37 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5238,)\n",
      "len of dfs: 5238\n",
      "the vocabulary size =:5238\n",
      "time passed:9764.695636987686\n",
      "DC0:base entropy=-0.8258705064544194, DC0:basete troenpy=-0.8724789572836605\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5238\n",
      "fle max=-1.0788349014425027\n",
      "normalizing bias features ...\n",
      "number of ebias features =5238\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 38 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5289,)\n",
      "len of dfs: 5289\n",
      "the vocabulary size =:5289\n",
      "time passed:9767.302859067917\n",
      "DC0:base entropy=-0.8345553442435768, DC0:basete troenpy=-0.8551736876960938\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5289\n",
      "fle max=-1.0515663633968177\n",
      "normalizing bias features ...\n",
      "number of ebias features =5289\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 39 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5373,)\n",
      "len of dfs: 5373\n",
      "the vocabulary size =:5373\n",
      "time passed:9769.981216907501\n",
      "DC0:base entropy=-0.8274632692384964, DC0:basete troenpy=-0.8698427007012903\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5373\n",
      "fle max=-1.0890219585034902\n",
      "normalizing bias features ...\n",
      "number of ebias features =5373\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 40 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5344,)\n",
      "len of dfs: 5344\n",
      "the vocabulary size =:5344\n",
      "time passed:9772.620245933533\n",
      "DC0:base entropy=-0.8308012806718641, DC0:basete troenpy=-0.8625528263075752\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5344\n",
      "fle max=-1.0704158071751593\n",
      "normalizing bias features ...\n",
      "number of ebias features =5344\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 41 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5290,)\n",
      "len of dfs: 5290\n",
      "the vocabulary size =:5290\n",
      "time passed:9775.264142036438\n",
      "DC0:base entropy=-0.8363931920225852, DC0:basete troenpy=-0.8515275939697678\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5290\n",
      "fle max=-1.0760077185258805\n",
      "normalizing bias features ...\n",
      "number of ebias features =5290\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 42 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5294,)\n",
      "len of dfs: 5294\n",
      "the vocabulary size =:5294\n",
      "time passed:9777.89524102211\n",
      "DC0:base entropy=-0.8432926528305829, DC0:basete troenpy=-0.8373115634870756\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5294\n",
      "fle max=-1.0969498297296432\n",
      "normalizing bias features ...\n",
      "number of ebias features =5294\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 43 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5275,)\n",
      "len of dfs: 5275\n",
      "the vocabulary size =:5275\n",
      "time passed:9780.468204021454\n",
      "DC0:base entropy=-0.8347053796251818, DC0:basete troenpy=-0.8551133068110387\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5275\n",
      "fle max=-1.0949867822749364\n",
      "normalizing bias features ...\n",
      "number of ebias features =5275\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 44 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5303,)\n",
      "len of dfs: 5303\n",
      "the vocabulary size =:5303\n",
      "time passed:9783.064321041107\n",
      "DC0:base entropy=-0.828141820781002, DC0:basete troenpy=-0.8675757188464942\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5303\n",
      "fle max=-1.0811062157690854\n",
      "normalizing bias features ...\n",
      "number of ebias features =5303\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 45 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5305,)\n",
      "len of dfs: 5305\n",
      "the vocabulary size =:5305\n",
      "time passed:9785.698992013931\n",
      "DC0:base entropy=-0.8363931920225852, DC0:basete troenpy=-0.8515275939697678\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5305\n",
      "fle max=-1.1128156662707853\n",
      "normalizing bias features ...\n",
      "number of ebias features =5305\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 46 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5293,)\n",
      "len of dfs: 5293\n",
      "the vocabulary size =:5293\n",
      "time passed:9788.788754940033\n",
      "DC0:base entropy=-0.8413285971285098, DC0:basete troenpy=-0.841879838471296\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5293\n",
      "fle max=-1.0606478385216698\n",
      "normalizing bias features ...\n",
      "number of ebias features =5293\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 47 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5246,)\n",
      "len of dfs: 5246\n",
      "the vocabulary size =:5246\n",
      "time passed:9791.421502828598\n",
      "DC0:base entropy=-0.8318627043731625, DC0:basete troenpy=-0.8601607573924706\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =5246\n",
      "fle max=-1.059599281056864\n",
      "normalizing bias features ...\n",
      "number of ebias features =5246\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 48 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5286,)\n",
      "len of dfs: 5286\n",
      "the vocabulary size =:5286\n",
      "time passed:9794.02049612999\n",
      "DC0:base entropy=-0.8457456350689994, DC0:basete troenpy=-0.8325744146944318\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5286\n",
      "fle max=-1.104610341063112\n",
      "normalizing bias features ...\n",
      "number of ebias features =5286\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 49 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_twitter_by_line.txt\n",
      "\"negative\"--->0\n",
      "\"neutral\"--->1\n",
      "\"positive\"--->2\n",
      "3115\n",
      "623\n",
      "2492\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 2492 docs.\n",
      "shape of dfs:(5324,)\n",
      "len of dfs: 5324\n",
      "the vocabulary size =:5324\n",
      "time passed:9796.647390842438\n",
      "DC0:base entropy=-0.8352934229945644, DC0:basete troenpy=-0.8529375010250706\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =5324\n",
      "fle max=-1.0813819570274608\n",
      "normalizing bias features ...\n",
      "number of ebias features =5324\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "(0.29691813804173356, 0.020248883625192678)\n"
     ]
    }
   ],
   "source": [
    "dftw=repeatSampling_knn(twfn, Nsample=50)\n",
    "df=dftw\n",
    "print((np.mean(df.nege), np.std(df.nege)))\n",
    "#dftw=repeatSampling_knn_ncf(twfn, Nsample=30)\n",
    "#dfclass=repeatSampling_knn_ncf(classfn, Nsample=30)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c67cd8fd",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "ac07d320",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " ... 0 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22609,)\n",
      "len of dfs: 22609\n",
      "the vocabulary size =:22609\n",
      "time passed:9799.680903196335\n",
      "DC0:base entropy=-1.2865673904776096, DC0:basete troenpy=-0.3818871274829971\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22609\n",
      "fle max=-1.3894275081701366\n",
      "normalizing bias features ...\n",
      "number of ebias features =22609\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 1 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22559,)\n",
      "len of dfs: 22559\n",
      "the vocabulary size =:22559\n",
      "time passed:9828.642216205597\n",
      "DC0:base entropy=-1.2878425388909513, DC0:basete troenpy=-0.3811985294049629\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22559\n",
      "fle max=-1.5521958823070212\n",
      "normalizing bias features ...\n",
      "number of ebias features =22559\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 2 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22467,)\n",
      "len of dfs: 22467\n",
      "the vocabulary size =:22467\n",
      "time passed:9857.478463888168\n",
      "DC0:base entropy=-1.2850472062148794, DC0:basete troenpy=-0.3837915604914456\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22467\n",
      "fle max=-1.4165154035275542\n",
      "normalizing bias features ...\n",
      "number of ebias features =22467\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 3 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22561,)\n",
      "len of dfs: 22561\n",
      "the vocabulary size =:22561\n",
      "time passed:9886.291408061981\n",
      "DC0:base entropy=-1.2850036669510838, DC0:basete troenpy=-0.3837067501170595\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22561\n",
      "fle max=-1.4159248546849437\n",
      "normalizing bias features ...\n",
      "number of ebias features =22561\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 4 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22549,)\n",
      "len of dfs: 22549\n",
      "the vocabulary size =:22549\n",
      "time passed:9915.010460138321\n",
      "DC0:base entropy=-1.2826539826598076, DC0:basete troenpy=-0.38612314245628976\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22549\n",
      "fle max=-1.4109129773624136\n",
      "normalizing bias features ...\n",
      "number of ebias features =22549\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 5 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22343,)\n",
      "len of dfs: 22343\n",
      "the vocabulary size =:22343\n",
      "time passed:9943.666085004807\n",
      "DC0:base entropy=-1.2827159352525133, DC0:basete troenpy=-0.3856827281791073\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22343\n",
      "fle max=-1.3643415615510674\n",
      "normalizing bias features ...\n",
      "number of ebias features =22343\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 6 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22598,)\n",
      "len of dfs: 22598\n",
      "the vocabulary size =:22598\n",
      "time passed:9972.190040111542\n",
      "DC0:base entropy=-1.2843781059074026, DC0:basete troenpy=-0.38419543137182016\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22598\n",
      "fle max=-1.4136876764501642\n",
      "normalizing bias features ...\n",
      "number of ebias features =22598\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 7 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22455,)\n",
      "len of dfs: 22455\n",
      "the vocabulary size =:22455\n",
      "time passed:10001.002354860306\n",
      "DC0:base entropy=-1.2851346237215195, DC0:basete troenpy=-0.38354711859103124\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22455\n",
      "fle max=-1.3955891085421546\n",
      "normalizing bias features ...\n",
      "number of ebias features =22455\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 8 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22413,)\n",
      "len of dfs: 22413\n",
      "the vocabulary size =:22413\n",
      "time passed:10029.65792298317\n",
      "DC0:base entropy=-1.2831998666588933, DC0:basete troenpy=-0.3849352266220339\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22413\n",
      "fle max=-1.4112081059487924\n",
      "normalizing bias features ...\n",
      "number of ebias features =22413\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 9 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22413,)\n",
      "len of dfs: 22413\n",
      "the vocabulary size =:22413\n",
      "time passed:10058.531461000443\n",
      "DC0:base entropy=-1.284246872838008, DC0:basete troenpy=-0.3848648871168947\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22413\n",
      "fle max=-1.4275047838852422\n",
      "normalizing bias features ...\n",
      "number of ebias features =22413\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 10 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22341,)\n",
      "len of dfs: 22341\n",
      "the vocabulary size =:22341\n",
      "time passed:10087.323899030685\n",
      "DC0:base entropy=-1.2846587342765607, DC0:basete troenpy=-0.3843668715037444\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22341\n",
      "fle max=-1.4189387053223839\n",
      "normalizing bias features ...\n",
      "number of ebias features =22341\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 11 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "shape of dfs:(22489,)\n",
      "len of dfs: 22489\n",
      "the vocabulary size =:22489\n",
      "time passed:10116.05643582344\n",
      "DC0:base entropy=-1.2865755796875011, DC0:basete troenpy=-0.38197858462589324\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22489\n",
      "fle max=-1.5688037835360362\n",
      "normalizing bias features ...\n",
      "number of ebias features =22489\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 12 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22473,)\n",
      "len of dfs: 22473\n",
      "the vocabulary size =:22473\n",
      "time passed:10144.89321899414\n",
      "DC0:base entropy=-1.285885086549323, DC0:basete troenpy=-0.3826961763737822\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22473\n",
      "fle max=-1.5757654734429392\n",
      "normalizing bias features ...\n",
      "number of ebias features =22473\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 13 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22556,)\n",
      "len of dfs: 22556\n",
      "the vocabulary size =:22556\n",
      "time passed:10174.115260124207\n",
      "DC0:base entropy=-1.283829339096649, DC0:basete troenpy=-0.38515278981882534\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22556\n",
      "fle max=-1.3661476818314493\n",
      "normalizing bias features ...\n",
      "number of ebias features =22556\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 14 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22482,)\n",
      "len of dfs: 22482\n",
      "the vocabulary size =:22482\n",
      "time passed:10202.768350839615\n",
      "DC0:base entropy=-1.2848647099763668, DC0:basete troenpy=-0.3835646702089468\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22482\n",
      "fle max=-1.4157858977102267\n",
      "normalizing bias features ...\n",
      "number of ebias features =22482\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 15 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22505,)\n",
      "len of dfs: 22505\n",
      "the vocabulary size =:22505\n",
      "time passed:10231.464859008789\n",
      "DC0:base entropy=-1.2880464092841917, DC0:basete troenpy=-0.3802317183150829\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22505\n",
      "fle max=-1.3947797012697922\n",
      "normalizing bias features ...\n",
      "number of ebias features =22505\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 16 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22547,)\n",
      "len of dfs: 22547\n",
      "the vocabulary size =:22547\n",
      "time passed:10260.342450141907\n",
      "DC0:base entropy=-1.2844344037985422, DC0:basete troenpy=-0.3845857363423319\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22547\n",
      "fle max=-1.391167695784143\n",
      "normalizing bias features ...\n",
      "number of ebias features =22547\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 17 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22384,)\n",
      "len of dfs: 22384\n",
      "the vocabulary size =:22384\n",
      "time passed:10289.489977121353\n",
      "DC0:base entropy=-1.2828957594923538, DC0:basete troenpy=-0.3858990972983653\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22384\n",
      "fle max=-1.3928696444583786\n",
      "normalizing bias features ...\n",
      "number of ebias features =22384\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 18 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22593,)\n",
      "len of dfs: 22593\n",
      "the vocabulary size =:22593\n",
      "time passed:10317.971982955933\n",
      "DC0:base entropy=-1.286796818264845, DC0:basete troenpy=-0.3815957356025881\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22593\n",
      "fle max=-1.415055812967451\n",
      "normalizing bias features ...\n",
      "number of ebias features =22593\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 19 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22652,)\n",
      "len of dfs: 22652\n",
      "the vocabulary size =:22652\n",
      "time passed:10346.665647029877\n",
      "DC0:base entropy=-1.2864353191942697, DC0:basete troenpy=-0.38284274583462097\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22652\n",
      "fle max=-1.5213937556652526\n",
      "normalizing bias features ...\n",
      "number of ebias features =22652\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 20 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22645,)\n",
      "len of dfs: 22645\n",
      "the vocabulary size =:22645\n",
      "time passed:10375.595837831497\n",
      "DC0:base entropy=-1.2862592601369252, DC0:basete troenpy=-0.38302771265167795\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22645\n",
      "fle max=-1.3925465187810144\n",
      "normalizing bias features ...\n",
      "number of ebias features =22645\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 21 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22597,)\n",
      "len of dfs: 22597\n",
      "the vocabulary size =:22597\n",
      "time passed:10405.074810028076\n",
      "DC0:base entropy=-1.2835400606547487, DC0:basete troenpy=-0.38560062684577273\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22597\n",
      "fle max=-1.3655104500998465\n",
      "normalizing bias features ...\n",
      "number of ebias features =22597\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 22 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22478,)\n",
      "len of dfs: 22478\n",
      "the vocabulary size =:22478\n",
      "time passed:10433.881620883942\n",
      "DC0:base entropy=-1.2884056626509766, DC0:basete troenpy=-0.3798806903871535\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =22478\n",
      "fle max=-1.4834154145650726\n",
      "normalizing bias features ...\n",
      "number of ebias features =22478\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 23 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22437,)\n",
      "len of dfs: 22437\n",
      "the vocabulary size =:22437\n",
      "time passed:10462.469831228256\n",
      "DC0:base entropy=-1.2858779171186023, DC0:basete troenpy=-0.3826099129835335\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22437\n",
      "fle max=-1.3681962598534025\n",
      "normalizing bias features ...\n",
      "number of ebias features =22437\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 24 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22490,)\n",
      "len of dfs: 22490\n",
      "the vocabulary size =:22490\n",
      "time passed:10490.907762050629\n",
      "DC0:base entropy=-1.2876991063895247, DC0:basete troenpy=-0.3812586127116918\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22490\n",
      "fle max=-1.4167715910883398\n",
      "normalizing bias features ...\n",
      "number of ebias features =22490\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 25 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22617,)\n",
      "len of dfs: 22617\n",
      "the vocabulary size =:22617\n",
      "time passed:10519.585230112076\n",
      "DC0:base entropy=-1.2834744755816205, DC0:basete troenpy=-0.3858295213368162\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22617\n",
      "fle max=-1.433816874977538\n",
      "normalizing bias features ...\n",
      "number of ebias features =22617\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 26 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22519,)\n",
      "len of dfs: 22519\n",
      "the vocabulary size =:22519\n",
      "time passed:10549.167263031006\n",
      "DC0:base entropy=-1.2845457423399669, DC0:basete troenpy=-0.38421720860835396\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22519\n",
      "fle max=-1.421772335659791\n",
      "normalizing bias features ...\n",
      "number of ebias features =22519\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 27 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22459,)\n",
      "len of dfs: 22459\n",
      "the vocabulary size =:22459\n",
      "time passed:10577.698096036911\n",
      "DC0:base entropy=-1.2799307076706827, DC0:basete troenpy=-0.3894265290178103\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22459\n",
      "fle max=-1.387568444553614\n",
      "normalizing bias features ...\n",
      "number of ebias features =22459\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 28 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22568,)\n",
      "len of dfs: 22568\n",
      "the vocabulary size =:22568\n",
      "time passed:10606.320943117142\n",
      "DC0:base entropy=-1.2872112854916786, DC0:basete troenpy=-0.38150825032951396\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22568\n",
      "fle max=-1.388847437530927\n",
      "normalizing bias features ...\n",
      "number of ebias features =22568\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 29 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22564,)\n",
      "len of dfs: 22564\n",
      "the vocabulary size =:22564\n",
      "time passed:10635.244228124619\n",
      "DC0:base entropy=-1.2882904794186265, DC0:basete troenpy=-0.3808021872772718\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22564\n",
      "fle max=-1.4147378248645222\n",
      "normalizing bias features ...\n",
      "number of ebias features =22564\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 30 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22355,)\n",
      "len of dfs: 22355\n",
      "the vocabulary size =:22355\n",
      "time passed:10664.29602098465\n",
      "DC0:base entropy=-1.280659336489677, DC0:basete troenpy=-0.387533857747938\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22355\n",
      "fle max=-1.3865046104889256\n",
      "normalizing bias features ...\n",
      "number of ebias features =22355\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 31 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22491,)\n",
      "len of dfs: 22491\n",
      "the vocabulary size =:22491\n",
      "time passed:10692.754914999008\n",
      "DC0:base entropy=-1.284708964289186, DC0:basete troenpy=-0.38364028326277916\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22491\n",
      "fle max=-1.393268048625588\n",
      "normalizing bias features ...\n",
      "number of ebias features =22491\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 32 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22614,)\n",
      "len of dfs: 22614\n",
      "the vocabulary size =:22614\n",
      "time passed:10721.640158891678\n",
      "DC0:base entropy=-1.289045438429135, DC0:basete troenpy=-0.3792809462738046\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22614\n",
      "fle max=-1.3944527198258867\n",
      "normalizing bias features ...\n",
      "number of ebias features =22614\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 33 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22519,)\n",
      "len of dfs: 22519\n",
      "the vocabulary size =:22519\n",
      "time passed:10750.245276212692\n",
      "DC0:base entropy=-1.2833904552626003, DC0:basete troenpy=-0.3855567153236714\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22519\n",
      "fle max=-1.388797736659352\n",
      "normalizing bias features ...\n",
      "number of ebias features =22519\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " ... 34 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22495,)\n",
      "len of dfs: 22495\n",
      "the vocabulary size =:22495\n",
      "time passed:10779.172333955765\n",
      "DC0:base entropy=-1.2812073513974742, DC0:basete troenpy=-0.3880380548377516\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22495\n",
      "fle max=-1.3879406433830748\n",
      "normalizing bias features ...\n",
      "number of ebias features =22495\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 35 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22609,)\n",
      "len of dfs: 22609\n",
      "the vocabulary size =:22609\n",
      "time passed:10807.645421981812\n",
      "DC0:base entropy=-1.2908573845686122, DC0:basete troenpy=-0.37756047738226595\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22609\n",
      "fle max=-1.437563894106701\n",
      "normalizing bias features ...\n",
      "number of ebias features =22609\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 36 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22685,)\n",
      "len of dfs: 22685\n",
      "the vocabulary size =:22685\n",
      "time passed:10836.449330806732\n",
      "DC0:base entropy=-1.2851660355130234, DC0:basete troenpy=-0.3838657371657548\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22685\n",
      "fle max=-1.4153223137531348\n",
      "normalizing bias features ...\n",
      "number of ebias features =22685\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 37 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22535,)\n",
      "len of dfs: 22535\n",
      "the vocabulary size =:22535\n",
      "time passed:10865.188750982285\n",
      "DC0:base entropy=-1.2869003989838608, DC0:basete troenpy=-0.3815298653883322\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22535\n",
      "fle max=-1.392745672983109\n",
      "normalizing bias features ...\n",
      "number of ebias features =22535\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 38 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22512,)\n",
      "len of dfs: 22512\n",
      "the vocabulary size =:22512\n",
      "time passed:10893.857908964157\n",
      "DC0:base entropy=-1.2839497735794714, DC0:basete troenpy=-0.3846894483800883\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22512\n",
      "fle max=-1.3911332053535934\n",
      "normalizing bias features ...\n",
      "number of ebias features =22512\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 39 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22490,)\n",
      "len of dfs: 22490\n",
      "the vocabulary size =:22490\n",
      "time passed:10922.667073011398\n",
      "DC0:base entropy=-1.2862645210680834, DC0:basete troenpy=-0.382438259608197\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22490\n",
      "fle max=-1.3957623499336724\n",
      "normalizing bias features ...\n",
      "number of ebias features =22490\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 40 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22320,)\n",
      "len of dfs: 22320\n",
      "the vocabulary size =:22320\n",
      "time passed:10951.424019813538\n",
      "DC0:base entropy=-1.282681652327815, DC0:basete troenpy=-0.38578127949243\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22320\n",
      "fle max=-1.4253819611347271\n",
      "normalizing bias features ...\n",
      "number of ebias features =22320\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 41 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22446,)\n",
      "len of dfs: 22446\n",
      "the vocabulary size =:22446\n",
      "time passed:10979.500819921494\n",
      "DC0:base entropy=-1.2850283826820488, DC0:basete troenpy=-0.3840942664624246\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22446\n",
      "fle max=-1.388303672288918\n",
      "normalizing bias features ...\n",
      "number of ebias features =22446\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 42 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22564,)\n",
      "len of dfs: 22564\n",
      "the vocabulary size =:22564\n",
      "time passed:11008.192493915558\n",
      "DC0:base entropy=-1.284571627172482, DC0:basete troenpy=-0.38441614489035625\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22564\n",
      "fle max=-1.4272719359793937\n",
      "normalizing bias features ...\n",
      "number of ebias features =22564\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 43 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22628,)\n",
      "len of dfs: 22628\n",
      "the vocabulary size =:22628\n",
      "time passed:11037.203305006027\n",
      "DC0:base entropy=-1.288874159548161, DC0:basete troenpy=-0.37946074532619123\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22628\n",
      "fle max=-1.3960575913222828\n",
      "normalizing bias features ...\n",
      "number of ebias features =22628\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 44 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22752,)\n",
      "len of dfs: 22752\n",
      "the vocabulary size =:22752\n",
      "time passed:11066.262155056\n",
      "DC0:base entropy=-1.2879353974221608, DC0:basete troenpy=-0.38047213385155665\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22752\n",
      "fle max=-1.4352344654772504\n",
      "normalizing bias features ...\n",
      "number of ebias features =22752\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 45 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "shape of dfs:(22618,)\n",
      "len of dfs: 22618\n",
      "the vocabulary size =:22618\n",
      "time passed:11095.261371135712\n",
      "DC0:base entropy=-1.2863054835812138, DC0:basete troenpy=-0.3823004924182397\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22618\n",
      "fle max=-1.419445106104372\n",
      "normalizing bias features ...\n",
      "number of ebias features =22618\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 46 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22615,)\n",
      "len of dfs: 22615\n",
      "the vocabulary size =:22615\n",
      "time passed:11123.932652950287\n",
      "DC0:base entropy=-1.281783927129932, DC0:basete troenpy=-0.3879733640041642\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22615\n",
      "fle max=-1.3912817559955213\n",
      "normalizing bias features ...\n",
      "number of ebias features =22615\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 47 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22511,)\n",
      "len of dfs: 22511\n",
      "the vocabulary size =:22511\n",
      "time passed:11153.090924024582\n",
      "DC0:base entropy=-1.2856452208598372, DC0:basete troenpy=-0.383241789200025\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22511\n",
      "fle max=-1.4210871054004528\n",
      "normalizing bias features ...\n",
      "number of ebias features =22511\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 48 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22603,)\n",
      "len of dfs: 22603\n",
      "the vocabulary size =:22603\n",
      "time passed:11181.683059930801\n",
      "DC0:base entropy=-1.2869274035693339, DC0:basete troenpy=-0.3821178674479012\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22603\n",
      "fle max=-1.3889680008026892\n",
      "normalizing bias features ...\n",
      "number of ebias features =22603\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 49 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_classic.txt\n",
      "CACM--->0\n",
      "CRAN--->1\n",
      "CISI--->2\n",
      "MED--->3\n",
      "7097\n",
      "1419\n",
      "5678\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 5678 docs.\n",
      "shape of dfs:(22454,)\n",
      "len of dfs: 22454\n",
      "the vocabulary size =:22454\n",
      "time passed:11210.4081761837\n",
      "DC0:base entropy=-1.2906177649097983, DC0:basete troenpy=-0.37767446416925393\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =22454\n",
      "fle max=-1.401557460898074\n",
      "normalizing bias features ...\n",
      "number of ebias features =22454\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "(0.05423537702607471, 0.005077831139907704)\n"
     ]
    }
   ],
   "source": [
    "dfclass=repeatSampling_knn(classfn, Nsample=50)\n",
    "df=dfclass\n",
    "print((np.mean(df.nege), np.std(df.nege)))\n",
    "#dftw=repeatSampling_knn_ncf(twfn, Nsample=30)\n",
    "#dfclass=repeatSampling_knn_ncf(classfn, Nsample=30)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0c3b1d3",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c99ee884",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a0864642",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c28a1a67",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8637ffc9",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "74adcbdc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " ... 0 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35840,)\n",
      "len of dfs: 35840\n",
      "the vocabulary size =:35840\n",
      "time passed:14192.51262497902\n",
      "DC0:base entropy=-1.3856566587963985, DC0:basete troenpy=-0.2874837817063782\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35840\n",
      "fle max=2.766501463650167\n",
      "normalizing bias features ...\n",
      "number of ebias features =35840\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35840,)\n",
      "len of dfs: 35840\n",
      "the vocabulary size =:35840\n",
      "time passed:14252.490311145782\n",
      "DC0:base entropy=-1.3856566587963985, DC0:basete troenpy=-0.2874837817063782\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35840\n",
      "fle max=10000.721731582502\n",
      "normalizing bias features ...\n",
      "number of ebias features =35840\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35840,)\n",
      "len of dfs: 35840\n",
      "the vocabulary size =:35840\n",
      "time passed:14317.175718069077\n",
      "DC0:base entropy=-1.3856566587963985, DC0:basete troenpy=-0.2874837817063782\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35840\n",
      "fle max=92.56601064479972\n",
      "normalizing bias features ...\n",
      "number of ebias features =35840\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35840,)\n",
      "len of dfs: 35840\n",
      "the vocabulary size =:35840\n",
      "time passed:14376.388675928116\n",
      "DC0:base entropy=-1.3856566587963985, DC0:basete troenpy=-0.2874837817063782\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35840\n",
      "fle max=4.776120151438517\n",
      "normalizing bias features ...\n",
      "number of ebias features =35840\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35840,)\n",
      "len of dfs: 35840\n",
      "the vocabulary size =:35840\n",
      "time passed:14436.648713111877\n",
      "DC0:base entropy=-1.3856566587963985, DC0:basete troenpy=-0.2874837817063782\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35840\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 1 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36079,)\n",
      "len of dfs: 36079\n",
      "the vocabulary size =:36079\n",
      "time passed:14497.857902050018\n",
      "DC0:base entropy=-1.3856263898396706, DC0:basete troenpy=-0.2875072089004449\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36079\n",
      "fle max=2.766651935458554\n",
      "normalizing bias features ...\n",
      "number of ebias features =36079\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36079,)\n",
      "len of dfs: 36079\n",
      "the vocabulary size =:36079\n",
      "time passed:14558.691003084183\n",
      "DC0:base entropy=-1.3856263898396706, DC0:basete troenpy=-0.2875072089004449\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36079\n",
      "fle max=10000.72174734984\n",
      "normalizing bias features ...\n",
      "number of ebias features =36079\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36079,)\n",
      "len of dfs: 36079\n",
      "the vocabulary size =:36079\n",
      "time passed:14619.67808008194\n",
      "DC0:base entropy=-1.3856263898396706, DC0:basete troenpy=-0.2875072089004449\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36079\n",
      "fle max=146.2887489844106\n",
      "normalizing bias features ...\n",
      "number of ebias features =36079\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36079,)\n",
      "len of dfs: 36079\n",
      "the vocabulary size =:36079\n",
      "time passed:14679.662498235703\n",
      "DC0:base entropy=-1.3856263898396706, DC0:basete troenpy=-0.2875072089004449\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36079\n",
      "fle max=4.776143578632585\n",
      "normalizing bias features ...\n",
      "number of ebias features =36079\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36079,)\n",
      "len of dfs: 36079\n",
      "the vocabulary size =:36079\n",
      "time passed:14738.772931098938\n",
      "DC0:base entropy=-1.3856263898396706, DC0:basete troenpy=-0.2875072089004449\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36079\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 2 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35981,)\n",
      "len of dfs: 35981\n",
      "the vocabulary size =:35981\n",
      "time passed:14799.444865226746\n",
      "DC0:base entropy=-1.3856657498853582, DC0:basete troenpy=-0.2874767232229878\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35981\n",
      "fle max=2.767093844060063\n",
      "normalizing bias features ...\n",
      "number of ebias features =35981\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35981,)\n",
      "len of dfs: 35981\n",
      "the vocabulary size =:35981\n",
      "time passed:14858.418954133987\n",
      "DC0:base entropy=-1.3856657498853582, DC0:basete troenpy=-0.2874767232229878\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35981\n",
      "fle max=10000.721726847018\n",
      "normalizing bias features ...\n",
      "number of ebias features =35981\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35981,)\n",
      "len of dfs: 35981\n",
      "the vocabulary size =:35981\n",
      "time passed:14918.127343893051\n",
      "DC0:base entropy=-1.3856657498853582, DC0:basete troenpy=-0.2874767232229878\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35981\n",
      "fle max=84.59861084568868\n",
      "normalizing bias features ...\n",
      "number of ebias features =35981\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35981,)\n",
      "len of dfs: 35981\n",
      "the vocabulary size =:35981\n",
      "time passed:14975.621098041534\n",
      "DC0:base entropy=-1.3856657498853582, DC0:basete troenpy=-0.2874767232229878\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =35981\n",
      "fle max=4.753384841877572\n",
      "normalizing bias features ...\n",
      "number of ebias features =35981\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35981,)\n",
      "len of dfs: 35981\n",
      "the vocabulary size =:35981\n",
      "time passed:15032.272274971008\n",
      "DC0:base entropy=-1.3856657498853582, DC0:basete troenpy=-0.2874767232229878\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35981\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 3 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36145,)\n",
      "len of dfs: 36145\n",
      "the vocabulary size =:36145\n",
      "time passed:15090.409197092056\n",
      "DC0:base entropy=-1.3856350831511708, DC0:basete troenpy=-0.28750056660355583\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36145\n",
      "fle max=2.767093648294555\n",
      "normalizing bias features ...\n",
      "number of ebias features =36145\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36145,)\n",
      "len of dfs: 36145\n",
      "the vocabulary size =:36145\n",
      "time passed:15147.949645280838\n",
      "DC0:base entropy=-1.3856350831511708, DC0:basete troenpy=-0.28750056660355583\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36145\n",
      "fle max=10000.721742821355\n",
      "normalizing bias features ...\n",
      "number of ebias features =36145\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36145,)\n",
      "len of dfs: 36145\n",
      "the vocabulary size =:36145\n",
      "time passed:15204.91635107994\n",
      "DC0:base entropy=-1.3856350831511708, DC0:basete troenpy=-0.28750056660355583\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36145\n",
      "fle max=90.57435507022096\n",
      "normalizing bias features ...\n",
      "number of ebias features =36145\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36145,)\n",
      "len of dfs: 36145\n",
      "the vocabulary size =:36145\n",
      "time passed:15262.487449884415\n",
      "DC0:base entropy=-1.3856350831511708, DC0:basete troenpy=-0.28750056660355583\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36145\n",
      "fle max=4.764837381081763\n",
      "normalizing bias features ...\n",
      "number of ebias features =36145\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36145,)\n",
      "len of dfs: 36145\n",
      "the vocabulary size =:36145\n",
      "time passed:15320.257099151611\n",
      "DC0:base entropy=-1.3856350831511708, DC0:basete troenpy=-0.28750056660355583\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36145\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 4 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36009,)\n",
      "len of dfs: 36009\n",
      "the vocabulary size =:36009\n",
      "time passed:15378.93832397461\n",
      "DC0:base entropy=-1.385584341674654, DC0:basete troenpy=-0.2875396099284442\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36009\n",
      "fle max=2.7665657769224143\n",
      "normalizing bias features ...\n",
      "number of ebias features =36009\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36009,)\n",
      "len of dfs: 36009\n",
      "the vocabulary size =:36009\n",
      "time passed:15436.46251296997\n",
      "DC0:base entropy=-1.385584341674654, DC0:basete troenpy=-0.2875396099284442\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36009\n",
      "fle max=10000.721769254204\n",
      "normalizing bias features ...\n",
      "number of ebias features =36009\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36009,)\n",
      "len of dfs: 36009\n",
      "the vocabulary size =:36009\n",
      "time passed:15493.334526062012\n",
      "DC0:base entropy=-1.385584341674654, DC0:basete troenpy=-0.2875396099284442\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36009\n",
      "fle max=78.62150775751279\n",
      "normalizing bias features ...\n",
      "number of ebias features =36009\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36009,)\n",
      "len of dfs: 36009\n",
      "the vocabulary size =:36009\n",
      "time passed:15551.96393418312\n",
      "DC0:base entropy=-1.385584341674654, DC0:basete troenpy=-0.2875396099284442\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36009\n",
      "fle max=4.798399116445294\n",
      "normalizing bias features ...\n",
      "number of ebias features =36009\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36009,)\n",
      "len of dfs: 36009\n",
      "the vocabulary size =:36009\n",
      "time passed:15610.460655927658\n",
      "DC0:base entropy=-1.385584341674654, DC0:basete troenpy=-0.2875396099284442\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36009\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 5 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35880,)\n",
      "len of dfs: 35880\n",
      "the vocabulary size =:35880\n",
      "time passed:15672.151417970657\n",
      "DC0:base entropy=-1.3856156675634816, DC0:basete troenpy=-0.2875158017511275\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35880\n",
      "fle max=2.7661852868636867\n",
      "normalizing bias features ...\n",
      "number of ebias features =35880\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35880,)\n",
      "len of dfs: 35880\n",
      "the vocabulary size =:35880\n",
      "time passed:15734.779692173004\n",
      "DC0:base entropy=-1.3856156675634816, DC0:basete troenpy=-0.2875158017511275\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35880\n",
      "fle max=10000.721752935324\n",
      "normalizing bias features ...\n",
      "number of ebias features =35880\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35880,)\n",
      "len of dfs: 35880\n",
      "the vocabulary size =:35880\n",
      "time passed:15797.275557041168\n",
      "DC0:base entropy=-1.3856156675634816, DC0:basete troenpy=-0.2875158017511275\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =35880\n",
      "fle max=87.58661715015975\n",
      "normalizing bias features ...\n",
      "number of ebias features =35880\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35880,)\n",
      "len of dfs: 35880\n",
      "the vocabulary size =:35880\n",
      "time passed:15859.078563928604\n",
      "DC0:base entropy=-1.3856156675634816, DC0:basete troenpy=-0.2875158017511275\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35880\n",
      "fle max=4.7648526162293345\n",
      "normalizing bias features ...\n",
      "number of ebias features =35880\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35880,)\n",
      "len of dfs: 35880\n",
      "the vocabulary size =:35880\n",
      "time passed:15921.164756059647\n",
      "DC0:base entropy=-1.3856156675634816, DC0:basete troenpy=-0.2875158017511275\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35880\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 6 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36066,)\n",
      "len of dfs: 36066\n",
      "the vocabulary size =:36066\n",
      "time passed:15982.82836318016\n",
      "DC0:base entropy=-1.385611292023019, DC0:basete troenpy=-0.28751887987448044\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36066\n",
      "fle max=2.7661800171701936\n",
      "normalizing bias features ...\n",
      "number of ebias features =36066\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36066,)\n",
      "len of dfs: 36066\n",
      "the vocabulary size =:36066\n",
      "time passed:16044.99063205719\n",
      "DC0:base entropy=-1.385611292023019, DC0:basete troenpy=-0.28751887987448044\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36066\n",
      "fle max=10000.721755214669\n",
      "normalizing bias features ...\n",
      "number of ebias features =36066\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36066,)\n",
      "len of dfs: 36066\n",
      "the vocabulary size =:36066\n",
      "time passed:16106.998429059982\n",
      "DC0:base entropy=-1.385611292023019, DC0:basete troenpy=-0.28751887987448044\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36066\n",
      "fle max=140.32433673277657\n",
      "normalizing bias features ...\n",
      "number of ebias features =36066\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36066,)\n",
      "len of dfs: 36066\n",
      "the vocabulary size =:36066\n",
      "time passed:16169.283299922943\n",
      "DC0:base entropy=-1.385611292023019, DC0:basete troenpy=-0.28751887987448044\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36066\n",
      "fle max=4.851867071342316\n",
      "normalizing bias features ...\n",
      "number of ebias features =36066\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36066,)\n",
      "len of dfs: 36066\n",
      "the vocabulary size =:36066\n",
      "time passed:16230.397062063217\n",
      "DC0:base entropy=-1.385611292023019, DC0:basete troenpy=-0.28751887987448044\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36066\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 7 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36316,)\n",
      "len of dfs: 36316\n",
      "the vocabulary size =:36316\n",
      "time passed:16293.40137720108\n",
      "DC0:base entropy=-1.3856642841244566, DC0:basete troenpy=-0.28747786190228325\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36316\n",
      "fle max=2.7677751973688034\n",
      "normalizing bias features ...\n",
      "number of ebias features =36316\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36316,)\n",
      "len of dfs: 36316\n",
      "the vocabulary size =:36316\n",
      "time passed:16355.450645208359\n",
      "DC0:base entropy=-1.3856642841244566, DC0:basete troenpy=-0.28747786190228325\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36316\n",
      "fle max=10000.721727610518\n",
      "normalizing bias features ...\n",
      "number of ebias features =36316\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36316,)\n",
      "len of dfs: 36316\n",
      "the vocabulary size =:36316\n",
      "time passed:16416.823045015335\n",
      "DC0:base entropy=-1.3856642841244566, DC0:basete troenpy=-0.28747786190228325\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36316\n",
      "fle max=146.28877584014685\n",
      "normalizing bias features ...\n",
      "number of ebias features =36316\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36316,)\n",
      "len of dfs: 36316\n",
      "the vocabulary size =:36316\n",
      "time passed:16478.89357805252\n",
      "DC0:base entropy=-1.3856642841244566, DC0:basete troenpy=-0.28747786190228325\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36316\n",
      "fle max=4.7301291183925995\n",
      "normalizing bias features ...\n",
      "number of ebias features =36316\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36316,)\n",
      "len of dfs: 36316\n",
      "the vocabulary size =:36316\n",
      "time passed:16541.035454034805\n",
      "DC0:base entropy=-1.3856642841244566, DC0:basete troenpy=-0.28747786190228325\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36316\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 8 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36008,)\n",
      "len of dfs: 36008\n",
      "the vocabulary size =:36008\n",
      "time passed:16603.931754112244\n",
      "DC0:base entropy=-1.3856339110049993, DC0:basete troenpy=-0.28750147789519087\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36008\n",
      "fle max=2.7666650418345355\n",
      "normalizing bias features ...\n",
      "number of ebias features =36008\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36008,)\n",
      "len of dfs: 36008\n",
      "the vocabulary size =:36008\n",
      "time passed:16665.151188135147\n",
      "DC0:base entropy=-1.3856339110049993, DC0:basete troenpy=-0.28750147789519087\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =36008\n",
      "fle max=10000.721743431941\n",
      "normalizing bias features ...\n",
      "number of ebias features =36008\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36008,)\n",
      "len of dfs: 36008\n",
      "the vocabulary size =:36008\n",
      "time passed:16726.563802957535\n",
      "DC0:base entropy=-1.3856339110049993, DC0:basete troenpy=-0.28750147789519087\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36008\n",
      "fle max=144.30075071175568\n",
      "normalizing bias features ...\n",
      "number of ebias features =36008\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36008,)\n",
      "len of dfs: 36008\n",
      "the vocabulary size =:36008\n",
      "time passed:16787.81309390068\n",
      "DC0:base entropy=-1.3856339110049993, DC0:basete troenpy=-0.28750147789519087\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36008\n",
      "fle max=4.830796260165195\n",
      "normalizing bias features ...\n",
      "number of ebias features =36008\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36008,)\n",
      "len of dfs: 36008\n",
      "the vocabulary size =:36008\n",
      "time passed:16849.627588033676\n",
      "DC0:base entropy=-1.3856339110049993, DC0:basete troenpy=-0.28750147789519087\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36008\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 9 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36438,)\n",
      "len of dfs: 36438\n",
      "the vocabulary size =:36438\n",
      "time passed:16912.310116052628\n",
      "DC0:base entropy=-1.3855941325211383, DC0:basete troenpy=-0.2875324883725894\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36438\n",
      "fle max=2.768007657138044\n",
      "normalizing bias features ...\n",
      "number of ebias features =36438\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36438,)\n",
      "len of dfs: 36438\n",
      "the vocabulary size =:36438\n",
      "time passed:16975.467043161392\n",
      "DC0:base entropy=-1.3855941325211383, DC0:basete troenpy=-0.2875324883725894\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36438\n",
      "fle max=10000.721764153688\n",
      "normalizing bias features ...\n",
      "number of ebias features =36438\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36438,)\n",
      "len of dfs: 36438\n",
      "the vocabulary size =:36438\n",
      "time passed:17038.70561504364\n",
      "DC0:base entropy=-1.3855941325211383, DC0:basete troenpy=-0.2875324883725894\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36438\n",
      "fle max=89.57845311300085\n",
      "normalizing bias features ...\n",
      "number of ebias features =36438\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36438,)\n",
      "len of dfs: 36438\n",
      "the vocabulary size =:36438\n",
      "time passed:17101.59155702591\n",
      "DC0:base entropy=-1.3855941325211383, DC0:basete troenpy=-0.2875324883725894\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36438\n",
      "fle max=4.798391994889439\n",
      "normalizing bias features ...\n",
      "number of ebias features =36438\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36438,)\n",
      "len of dfs: 36438\n",
      "the vocabulary size =:36438\n",
      "time passed:17164.44902896881\n",
      "DC0:base entropy=-1.3855941325211383, DC0:basete troenpy=-0.2875324883725894\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36438\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 10 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36130,)\n",
      "len of dfs: 36130\n",
      "the vocabulary size =:36130\n",
      "time passed:17228.98379087448\n",
      "DC0:base entropy=-1.38566271672531, DC0:basete troenpy=-0.287479074905507\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36130\n",
      "fle max=2.7669507434662117\n",
      "normalizing bias features ...\n",
      "number of ebias features =36130\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36130,)\n",
      "len of dfs: 36130\n",
      "the vocabulary size =:36130\n",
      "time passed:17291.595590114594\n",
      "DC0:base entropy=-1.38566271672531, DC0:basete troenpy=-0.287479074905507\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36130\n",
      "fle max=10000.721728426963\n",
      "normalizing bias features ...\n",
      "number of ebias features =36130\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36130,)\n",
      "len of dfs: 36130\n",
      "the vocabulary size =:36130\n",
      "time passed:17353.519603013992\n",
      "DC0:base entropy=-1.38566271672531, DC0:basete troenpy=-0.287479074905507\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36130\n",
      "fle max=88.58259395213209\n",
      "normalizing bias features ...\n",
      "number of ebias features =36130\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36130,)\n",
      "len of dfs: 36130\n",
      "the vocabulary size =:36130\n",
      "time passed:17415.348326921463\n",
      "DC0:base entropy=-1.38566271672531, DC0:basete troenpy=-0.287479074905507\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36130\n",
      "fle max=4.820078568058763\n",
      "normalizing bias features ...\n",
      "number of ebias features =36130\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36130,)\n",
      "len of dfs: 36130\n",
      "the vocabulary size =:36130\n",
      "time passed:17478.347128868103\n",
      "DC0:base entropy=-1.38566271672531, DC0:basete troenpy=-0.287479074905507\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36130\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 11 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35956,)\n",
      "len of dfs: 35956\n",
      "the vocabulary size =:35956\n",
      "time passed:17541.368954896927\n",
      "DC0:base entropy=-1.3856067781293098, DC0:basete troenpy=-0.287522595528759\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =35956\n",
      "fle max=2.766686928151678\n",
      "normalizing bias features ...\n",
      "number of ebias features =35956\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35956,)\n",
      "len of dfs: 35956\n",
      "the vocabulary size =:35956\n",
      "time passed:17603.701627969742\n",
      "DC0:base entropy=-1.3856067781293098, DC0:basete troenpy=-0.287522595528759\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35956\n",
      "fle max=10000.721757566102\n",
      "normalizing bias features ...\n",
      "number of ebias features =35956\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35956,)\n",
      "len of dfs: 35956\n",
      "the vocabulary size =:35956\n",
      "time passed:17665.44668197632\n",
      "DC0:base entropy=-1.3856067781293098, DC0:basete troenpy=-0.287522595528759\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35956\n",
      "fle max=88.58255416390018\n",
      "normalizing bias features ...\n",
      "number of ebias features =35956\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35956,)\n",
      "len of dfs: 35956\n",
      "the vocabulary size =:35956\n",
      "time passed:17727.36584997177\n",
      "DC0:base entropy=-1.3856067781293098, DC0:basete troenpy=-0.287522595528759\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35956\n",
      "fle max=4.7761589652608984\n",
      "normalizing bias features ...\n",
      "number of ebias features =35956\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35956,)\n",
      "len of dfs: 35956\n",
      "the vocabulary size =:35956\n",
      "time passed:17789.64496588707\n",
      "DC0:base entropy=-1.3856067781293098, DC0:basete troenpy=-0.287522595528759\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35956\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 12 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36325,)\n",
      "len of dfs: 36325\n",
      "the vocabulary size =:36325\n",
      "time passed:17851.89803504944\n",
      "DC0:base entropy=-1.3856136131480752, DC0:basete troenpy=-0.28751751103233814\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36325\n",
      "fle max=2.766824573463251\n",
      "normalizing bias features ...\n",
      "number of ebias features =36325\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36325,)\n",
      "len of dfs: 36325\n",
      "the vocabulary size =:36325\n",
      "time passed:17914.087069034576\n",
      "DC0:base entropy=-1.3856136131480752, DC0:basete troenpy=-0.28751751103233814\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36325\n",
      "fle max=10000.721754005526\n",
      "normalizing bias features ...\n",
      "number of ebias features =36325\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36325,)\n",
      "len of dfs: 36325\n",
      "the vocabulary size =:36325\n",
      "time passed:17976.168532133102\n",
      "DC0:base entropy=-1.3856136131480752, DC0:basete troenpy=-0.28751751103233814\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36325\n",
      "fle max=141.3184865374069\n",
      "normalizing bias features ...\n",
      "number of ebias features =36325\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36325,)\n",
      "len of dfs: 36325\n",
      "the vocabulary size =:36325\n",
      "time passed:18038.470799922943\n",
      "DC0:base entropy=-1.3856136131480752, DC0:basete troenpy=-0.28751751103233814\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36325\n",
      "fle max=4.7873271813626035\n",
      "normalizing bias features ...\n",
      "number of ebias features =36325\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36325,)\n",
      "len of dfs: 36325\n",
      "the vocabulary size =:36325\n",
      "time passed:18101.037260055542\n",
      "DC0:base entropy=-1.3856136131480752, DC0:basete troenpy=-0.28751751103233814\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36325\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 13 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36306,)\n",
      "len of dfs: 36306\n",
      "the vocabulary size =:36306\n",
      "time passed:18165.499895095825\n",
      "DC0:base entropy=-1.3855539355008066, DC0:basete troenpy=-0.2875628678365961\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36306\n",
      "fle max=2.7676198048884104\n",
      "normalizing bias features ...\n",
      "number of ebias features =36306\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36306,)\n",
      "len of dfs: 36306\n",
      "the vocabulary size =:36306\n",
      "time passed:18227.962817192078\n",
      "DC0:base entropy=-1.3855539355008066, DC0:basete troenpy=-0.2875628678365961\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36306\n",
      "fle max=10000.721785094673\n",
      "normalizing bias features ...\n",
      "number of ebias features =36306\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36306,)\n",
      "len of dfs: 36306\n",
      "the vocabulary size =:36306\n",
      "time passed:18290.55845117569\n",
      "DC0:base entropy=-1.3855539355008066, DC0:basete troenpy=-0.2875628678365961\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36306\n",
      "fle max=91.57013559830267\n",
      "normalizing bias features ...\n",
      "number of ebias features =36306\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36306,)\n",
      "len of dfs: 36306\n",
      "the vocabulary size =:36306\n",
      "time passed:18352.518977165222\n",
      "DC0:base entropy=-1.3855539355008066, DC0:basete troenpy=-0.2875628678365961\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36306\n",
      "fle max=4.851911059304432\n",
      "normalizing bias features ...\n",
      "number of ebias features =36306\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36306,)\n",
      "len of dfs: 36306\n",
      "the vocabulary size =:36306\n",
      "time passed:18414.959791898727\n",
      "DC0:base entropy=-1.3855539355008066, DC0:basete troenpy=-0.2875628678365961\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36306\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 14 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36091,)\n",
      "len of dfs: 36091\n",
      "the vocabulary size =:36091\n",
      "time passed:18479.46199798584\n",
      "DC0:base entropy=-1.3856606723297435, DC0:basete troenpy=-0.28748067219209755\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36091\n",
      "fle max=2.7667320553371564\n",
      "normalizing bias features ...\n",
      "number of ebias features =36091\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36091,)\n",
      "len of dfs: 36091\n",
      "the vocabulary size =:36091\n",
      "time passed:18541.92139196396\n",
      "DC0:base entropy=-1.3856606723297435, DC0:basete troenpy=-0.28748067219209755\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36091\n",
      "fle max=10000.721729491874\n",
      "normalizing bias features ...\n",
      "number of ebias features =36091\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36091,)\n",
      "len of dfs: 36091\n",
      "the vocabulary size =:36091\n",
      "time passed:18604.048313856125\n",
      "DC0:base entropy=-1.3856606723297435, DC0:basete troenpy=-0.28748067219209755\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36091\n",
      "fle max=148.27664686897901\n",
      "normalizing bias features ...\n",
      "number of ebias features =36091\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36091,)\n",
      "len of dfs: 36091\n",
      "the vocabulary size =:36091\n",
      "time passed:18665.61861205101\n",
      "DC0:base entropy=-1.3856606723297435, DC0:basete troenpy=-0.28748067219209755\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36091\n",
      "fle max=4.787290342522363\n",
      "normalizing bias features ...\n",
      "number of ebias features =36091\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36091,)\n",
      "len of dfs: 36091\n",
      "the vocabulary size =:36091\n",
      "time passed:18727.82690501213\n",
      "DC0:base entropy=-1.3856606723297435, DC0:basete troenpy=-0.28748067219209755\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36091\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 15 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36165,)\n",
      "len of dfs: 36165\n",
      "the vocabulary size =:36165\n",
      "time passed:18790.700684070587\n",
      "DC0:base entropy=-1.3855973936959523, DC0:basete troenpy=-0.28752976661735086\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36165\n",
      "fle max=2.766240004366896\n",
      "normalizing bias features ...\n",
      "number of ebias features =36165\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36165,)\n",
      "len of dfs: 36165\n",
      "the vocabulary size =:36165\n",
      "time passed:18852.762211084366\n",
      "DC0:base entropy=-1.3855973936959523, DC0:basete troenpy=-0.28752976661735086\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36165\n",
      "fle max=10000.721762454805\n",
      "normalizing bias features ...\n",
      "number of ebias features =36165\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36165,)\n",
      "len of dfs: 36165\n",
      "the vocabulary size =:36165\n",
      "time passed:18914.467048168182\n",
      "DC0:base entropy=-1.3855973936959523, DC0:basete troenpy=-0.28752976661735086\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36165\n",
      "fle max=96.54882944398724\n",
      "normalizing bias features ...\n",
      "number of ebias features =36165\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36165,)\n",
      "len of dfs: 36165\n",
      "the vocabulary size =:36165\n",
      "time passed:18975.883374929428\n",
      "DC0:base entropy=-1.3855973936959523, DC0:basete troenpy=-0.28752976661735086\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36165\n",
      "fle max=4.798389273134201\n",
      "normalizing bias features ...\n",
      "number of ebias features =36165\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36165,)\n",
      "len of dfs: 36165\n",
      "the vocabulary size =:36165\n",
      "time passed:19037.498442173004\n",
      "DC0:base entropy=-1.3855973936959523, DC0:basete troenpy=-0.28752976661735086\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36165\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 16 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36154,)\n",
      "len of dfs: 36154\n",
      "the vocabulary size =:36154\n",
      "time passed:19102.61734008789\n",
      "DC0:base entropy=-1.385617205441688, DC0:basete troenpy=-0.28751457664134217\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36154\n",
      "fle max=2.7675374309172733\n",
      "normalizing bias features ...\n",
      "number of ebias features =36154\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36154,)\n",
      "len of dfs: 36154\n",
      "the vocabulary size =:36154\n",
      "time passed:19165.48074412346\n",
      "DC0:base entropy=-1.385617205441688, DC0:basete troenpy=-0.28751457664134217\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36154\n",
      "fle max=10000.7217521342\n",
      "normalizing bias features ...\n",
      "number of ebias features =36154\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36154,)\n",
      "len of dfs: 36154\n",
      "the vocabulary size =:36154\n",
      "time passed:19227.29717683792\n",
      "DC0:base entropy=-1.385617205441688, DC0:basete troenpy=-0.28751457664134217\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36154\n",
      "fle max=107.49885380485729\n",
      "normalizing bias features ...\n",
      "number of ebias features =36154\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36154,)\n",
      "len of dfs: 36154\n",
      "the vocabulary size =:36154\n",
      "time passed:19291.134659290314\n",
      "DC0:base entropy=-1.385617205441688, DC0:basete troenpy=-0.28751457664134217\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =36154\n",
      "fle max=4.882634426775932\n",
      "normalizing bias features ...\n",
      "number of ebias features =36154\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36154,)\n",
      "len of dfs: 36154\n",
      "the vocabulary size =:36154\n",
      "time passed:19354.94566512108\n",
      "DC0:base entropy=-1.385617205441688, DC0:basete troenpy=-0.28751457664134217\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36154\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 17 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36145,)\n",
      "len of dfs: 36145\n",
      "the vocabulary size =:36145\n",
      "time passed:19421.66480588913\n",
      "DC0:base entropy=-1.38558877782073, DC0:basete troenpy=-0.28753643984462346\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36145\n",
      "fle max=2.766893440349154\n",
      "normalizing bias features ...\n",
      "number of ebias features =36145\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36145,)\n",
      "len of dfs: 36145\n",
      "the vocabulary size =:36145\n",
      "time passed:19484.934511899948\n",
      "DC0:base entropy=-1.38558877782073, DC0:basete troenpy=-0.28753643984462346\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36145\n",
      "fle max=10000.721766943196\n",
      "normalizing bias features ...\n",
      "number of ebias features =36145\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36145,)\n",
      "len of dfs: 36145\n",
      "the vocabulary size =:36145\n",
      "time passed:19549.60262799263\n",
      "DC0:base entropy=-1.38558877782073, DC0:basete troenpy=-0.28753643984462346\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36145\n",
      "fle max=82.60635063522025\n",
      "normalizing bias features ...\n",
      "number of ebias features =36145\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36145,)\n",
      "len of dfs: 36145\n",
      "the vocabulary size =:36145\n",
      "time passed:19612.14284801483\n",
      "DC0:base entropy=-1.38558877782073, DC0:basete troenpy=-0.28753643984462346\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36145\n",
      "fle max=4.776172809576763\n",
      "normalizing bias features ...\n",
      "number of ebias features =36145\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36145,)\n",
      "len of dfs: 36145\n",
      "the vocabulary size =:36145\n",
      "time passed:19675.545387029648\n",
      "DC0:base entropy=-1.38558877782073, DC0:basete troenpy=-0.28753643984462346\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36145\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 18 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36020,)\n",
      "len of dfs: 36020\n",
      "the vocabulary size =:36020\n",
      "time passed:19739.35575723648\n",
      "DC0:base entropy=-1.385573052547318, DC0:basete troenpy=-0.2875487804142933\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36020\n",
      "fle max=2.766127863200494\n",
      "normalizing bias features ...\n",
      "number of ebias features =36020\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36020,)\n",
      "len of dfs: 36020\n",
      "the vocabulary size =:36020\n",
      "time passed:19801.894566059113\n",
      "DC0:base entropy=-1.385573052547318, DC0:basete troenpy=-0.2875487804142933\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36020\n",
      "fle max=10000.721775135331\n",
      "normalizing bias features ...\n",
      "number of ebias features =36020\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36020,)\n",
      "len of dfs: 36020\n",
      "the vocabulary size =:36020\n",
      "time passed:19863.9720621109\n",
      "DC0:base entropy=-1.385573052547318, DC0:basete troenpy=-0.2875487804142933\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36020\n",
      "fle max=85.59459440339978\n",
      "normalizing bias features ...\n",
      "number of ebias features =36020\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36020,)\n",
      "len of dfs: 36020\n",
      "the vocabulary size =:36020\n",
      "time passed:19926.262813091278\n",
      "DC0:base entropy=-1.385573052547318, DC0:basete troenpy=-0.2875487804142933\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36020\n",
      "fle max=4.798408286931143\n",
      "normalizing bias features ...\n",
      "number of ebias features =36020\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36020,)\n",
      "len of dfs: 36020\n",
      "the vocabulary size =:36020\n",
      "time passed:19988.311012029648\n",
      "DC0:base entropy=-1.385573052547318, DC0:basete troenpy=-0.2875487804142933\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36020\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 19 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35699,)\n",
      "len of dfs: 35699\n",
      "the vocabulary size =:35699\n",
      "time passed:20051.69635105133\n",
      "DC0:base entropy=-1.3856282914224112, DC0:basete troenpy=-0.28750590035177503\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35699\n",
      "fle max=2.767041369530384\n",
      "normalizing bias features ...\n",
      "number of ebias features =35699\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35699,)\n",
      "len of dfs: 35699\n",
      "the vocabulary size =:35699\n",
      "time passed:20114.690233945847\n",
      "DC0:base entropy=-1.3856282914224112, DC0:basete troenpy=-0.28750590035177503\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35699\n",
      "fle max=10000.72174635927\n",
      "normalizing bias features ...\n",
      "number of ebias features =35699\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35699,)\n",
      "len of dfs: 35699\n",
      "the vocabulary size =:35699\n",
      "time passed:20176.40035009384\n",
      "DC0:base entropy=-1.3856282914224112, DC0:basete troenpy=-0.28750590035177503\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =35699\n",
      "fle max=91.57018785324084\n",
      "normalizing bias features ...\n",
      "number of ebias features =35699\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35699,)\n",
      "len of dfs: 35699\n",
      "the vocabulary size =:35699\n",
      "time passed:20239.403474092484\n",
      "DC0:base entropy=-1.3856282914224112, DC0:basete troenpy=-0.28750590035177503\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35699\n",
      "fle max=4.830800682621779\n",
      "normalizing bias features ...\n",
      "number of ebias features =35699\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35699,)\n",
      "len of dfs: 35699\n",
      "the vocabulary size =:35699\n",
      "time passed:20304.093945980072\n",
      "DC0:base entropy=-1.3856282914224112, DC0:basete troenpy=-0.28750590035177503\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35699\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 20 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36220,)\n",
      "len of dfs: 36220\n",
      "the vocabulary size =:36220\n",
      "time passed:20366.57988524437\n",
      "DC0:base entropy=-1.3856319102586212, DC0:basete troenpy=-0.28750297724579615\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36220\n",
      "fle max=2.7665081503766333\n",
      "normalizing bias features ...\n",
      "number of ebias features =36220\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36220,)\n",
      "len of dfs: 36220\n",
      "the vocabulary size =:36220\n",
      "time passed:20430.526883125305\n",
      "DC0:base entropy=-1.3856319102586212, DC0:basete troenpy=-0.28750297724579615\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36220\n",
      "fle max=10000.721744474158\n",
      "normalizing bias features ...\n",
      "number of ebias features =36220\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36220,)\n",
      "len of dfs: 36220\n",
      "the vocabulary size =:36220\n",
      "time passed:20493.921710968018\n",
      "DC0:base entropy=-1.3856319102586212, DC0:basete troenpy=-0.28750297724579615\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36220\n",
      "fle max=129.3865513684196\n",
      "normalizing bias features ...\n",
      "number of ebias features =36220\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36220,)\n",
      "len of dfs: 36220\n",
      "the vocabulary size =:36220\n",
      "time passed:20559.388241052628\n",
      "DC0:base entropy=-1.3856319102586212, DC0:basete troenpy=-0.28750297724579615\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36220\n",
      "fle max=4.798362483762646\n",
      "normalizing bias features ...\n",
      "number of ebias features =36220\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36220,)\n",
      "len of dfs: 36220\n",
      "the vocabulary size =:36220\n",
      "time passed:20622.623026132584\n",
      "DC0:base entropy=-1.3856319102586212, DC0:basete troenpy=-0.28750297724579615\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36220\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 21 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36148,)\n",
      "len of dfs: 36148\n",
      "the vocabulary size =:36148\n",
      "time passed:20687.00993204117\n",
      "DC0:base entropy=-1.3856546182534315, DC0:basete troenpy=-0.28748538055750567\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36148\n",
      "fle max=2.7654683672104268\n",
      "normalizing bias features ...\n",
      "number of ebias features =36148\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36148,)\n",
      "len of dfs: 36148\n",
      "the vocabulary size =:36148\n",
      "time passed:20748.758614063263\n",
      "DC0:base entropy=-1.3856546182534315, DC0:basete troenpy=-0.28748538055750567\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36148\n",
      "fle max=10000.721732645416\n",
      "normalizing bias features ...\n",
      "number of ebias features =36148\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36148,)\n",
      "len of dfs: 36148\n",
      "the vocabulary size =:36148\n",
      "time passed:20811.059719085693\n",
      "DC0:base entropy=-1.3856546182534315, DC0:basete troenpy=-0.28748538055750567\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36148\n",
      "fle max=144.30076543087188\n",
      "normalizing bias features ...\n",
      "number of ebias features =36148\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36148,)\n",
      "len of dfs: 36148\n",
      "the vocabulary size =:36148\n",
      "time passed:20875.07872915268\n",
      "DC0:base entropy=-1.3856546182534315, DC0:basete troenpy=-0.28748538055750567\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36148\n",
      "fle max=4.809273957606546\n",
      "normalizing bias features ...\n",
      "number of ebias features =36148\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36148,)\n",
      "len of dfs: 36148\n",
      "the vocabulary size =:36148\n",
      "time passed:20937.307868003845\n",
      "DC0:base entropy=-1.3856546182534315, DC0:basete troenpy=-0.28748538055750567\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36148\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 22 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36214,)\n",
      "len of dfs: 36214\n",
      "the vocabulary size =:36214\n",
      "time passed:21001.81188106537\n",
      "DC0:base entropy=-1.3856293089921263, DC0:basete troenpy=-0.287505157797218\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36214\n",
      "fle max=2.7666310028826127\n",
      "normalizing bias features ...\n",
      "number of ebias features =36214\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36214,)\n",
      "len of dfs: 36214\n",
      "the vocabulary size =:36214\n",
      "time passed:21063.867964029312\n",
      "DC0:base entropy=-1.3856293089921263, DC0:basete troenpy=-0.287505157797218\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =36214\n",
      "fle max=10000.7217458292\n",
      "normalizing bias features ...\n",
      "number of ebias features =36214\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36214,)\n",
      "len of dfs: 36214\n",
      "the vocabulary size =:36214\n",
      "time passed:21126.867371082306\n",
      "DC0:base entropy=-1.3856293089921263, DC0:basete troenpy=-0.287505157797218\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36214\n",
      "fle max=139.33016816371705\n",
      "normalizing bias features ...\n",
      "number of ebias features =36214\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36214,)\n",
      "len of dfs: 36214\n",
      "the vocabulary size =:36214\n",
      "time passed:21189.87815308571\n",
      "DC0:base entropy=-1.3856293089921263, DC0:basete troenpy=-0.287505157797218\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36214\n",
      "fle max=4.776141527529358\n",
      "normalizing bias features ...\n",
      "number of ebias features =36214\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36214,)\n",
      "len of dfs: 36214\n",
      "the vocabulary size =:36214\n",
      "time passed:21253.24848008156\n",
      "DC0:base entropy=-1.3856293089921263, DC0:basete troenpy=-0.287505157797218\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36214\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 23 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35965,)\n",
      "len of dfs: 35965\n",
      "the vocabulary size =:35965\n",
      "time passed:21318.85622215271\n",
      "DC0:base entropy=-1.3856434967916034, DC0:basete troenpy=-0.28749404215560576\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35965\n",
      "fle max=2.7651912442794164\n",
      "normalizing bias features ...\n",
      "number of ebias features =35965\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35965,)\n",
      "len of dfs: 35965\n",
      "the vocabulary size =:35965\n",
      "time passed:21380.807928085327\n",
      "DC0:base entropy=-1.3856434967916034, DC0:basete troenpy=-0.28749404215560576\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35965\n",
      "fle max=10000.72173843861\n",
      "normalizing bias features ...\n",
      "number of ebias features =35965\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35965,)\n",
      "len of dfs: 35965\n",
      "the vocabulary size =:35965\n",
      "time passed:21443.146762132645\n",
      "DC0:base entropy=-1.3856434967916034, DC0:basete troenpy=-0.28749404215560576\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35965\n",
      "fle max=151.25820202516613\n",
      "normalizing bias features ...\n",
      "number of ebias features =35965\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35965,)\n",
      "len of dfs: 35965\n",
      "the vocabulary size =:35965\n",
      "time passed:21504.926938056946\n",
      "DC0:base entropy=-1.3856434967916034, DC0:basete troenpy=-0.28749404215560576\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35965\n",
      "fle max=4.851842233623442\n",
      "normalizing bias features ...\n",
      "number of ebias features =35965\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35965,)\n",
      "len of dfs: 35965\n",
      "the vocabulary size =:35965\n",
      "time passed:21566.84852695465\n",
      "DC0:base entropy=-1.3856434967916034, DC0:basete troenpy=-0.28749404215560576\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35965\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 24 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36241,)\n",
      "len of dfs: 36241\n",
      "the vocabulary size =:36241\n",
      "time passed:21633.47163105011\n",
      "DC0:base entropy=-1.385623707099882, DC0:basete troenpy=-0.28750958775353097\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36241\n",
      "fle max=2.766100034249959\n",
      "normalizing bias features ...\n",
      "number of ebias features =36241\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36241,)\n",
      "len of dfs: 36241\n",
      "the vocabulary size =:36241\n",
      "time passed:21695.67004299164\n",
      "DC0:base entropy=-1.385623707099882, DC0:basete troenpy=-0.28750958775353097\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36241\n",
      "fle max=10000.721748747334\n",
      "normalizing bias features ...\n",
      "number of ebias features =36241\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36241,)\n",
      "len of dfs: 36241\n",
      "the vocabulary size =:36241\n",
      "time passed:21759.468411922455\n",
      "DC0:base entropy=-1.385623707099882, DC0:basete troenpy=-0.28750958775353097\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36241\n",
      "fle max=69.65353279783582\n",
      "normalizing bias features ...\n",
      "number of ebias features =36241\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36241,)\n",
      "len of dfs: 36241\n",
      "the vocabulary size =:36241\n",
      "time passed:21822.831321001053\n",
      "DC0:base entropy=-1.385623707099882, DC0:basete troenpy=-0.28750958775353097\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36241\n",
      "fle max=4.787319258083796\n",
      "normalizing bias features ...\n",
      "number of ebias features =36241\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36241,)\n",
      "len of dfs: 36241\n",
      "the vocabulary size =:36241\n",
      "time passed:21884.708636045456\n",
      "DC0:base entropy=-1.385623707099882, DC0:basete troenpy=-0.28750958775353097\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36241\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 25 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35913,)\n",
      "len of dfs: 35913\n",
      "the vocabulary size =:35913\n",
      "time passed:21949.212173223495\n",
      "DC0:base entropy=-1.3856659404990124, DC0:basete troenpy=-0.28747656943541916\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =35913\n",
      "fle max=2.766144354915722\n",
      "normalizing bias features ...\n",
      "number of ebias features =35913\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35913,)\n",
      "len of dfs: 35913\n",
      "the vocabulary size =:35913\n",
      "time passed:22015.077057123184\n",
      "DC0:base entropy=-1.3856659404990124, DC0:basete troenpy=-0.28747656943541916\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35913\n",
      "fle max=10000.721726747728\n",
      "normalizing bias features ...\n",
      "number of ebias features =35913\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35913,)\n",
      "len of dfs: 35913\n",
      "the vocabulary size =:35913\n",
      "time passed:22078.95503592491\n",
      "DC0:base entropy=-1.3856659404990124, DC0:basete troenpy=-0.28747656943541916\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35913\n",
      "fle max=83.60252599854114\n",
      "normalizing bias features ...\n",
      "number of ebias features =35913\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35913,)\n",
      "len of dfs: 35913\n",
      "the vocabulary size =:35913\n",
      "time passed:22142.44183897972\n",
      "DC0:base entropy=-1.3856659404990124, DC0:basete troenpy=-0.28747656943541916\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35913\n",
      "fle max=4.80926514648446\n",
      "normalizing bias features ...\n",
      "number of ebias features =35913\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35913,)\n",
      "len of dfs: 35913\n",
      "the vocabulary size =:35913\n",
      "time passed:22204.953716993332\n",
      "DC0:base entropy=-1.3856659404990124, DC0:basete troenpy=-0.28747656943541916\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35913\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 26 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36125,)\n",
      "len of dfs: 36125\n",
      "the vocabulary size =:36125\n",
      "time passed:22269.82717204094\n",
      "DC0:base entropy=-1.3855673288845975, DC0:basete troenpy=-0.2875533939360466\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36125\n",
      "fle max=2.7679911916541977\n",
      "normalizing bias features ...\n",
      "number of ebias features =36125\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36125,)\n",
      "len of dfs: 36125\n",
      "the vocabulary size =:36125\n",
      "time passed:22334.447287082672\n",
      "DC0:base entropy=-1.3855673288845975, DC0:basete troenpy=-0.2875533939360466\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36125\n",
      "fle max=10000.721778117138\n",
      "normalizing bias features ...\n",
      "number of ebias features =36125\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36125,)\n",
      "len of dfs: 36125\n",
      "the vocabulary size =:36125\n",
      "time passed:22398.375360012054\n",
      "DC0:base entropy=-1.3855673288845975, DC0:basete troenpy=-0.2875533939360466\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36125\n",
      "fle max=88.58252602539616\n",
      "normalizing bias features ...\n",
      "number of ebias features =36125\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36125,)\n",
      "len of dfs: 36125\n",
      "the vocabulary size =:36125\n",
      "time passed:22466.41593003273\n",
      "DC0:base entropy=-1.3855673288845975, DC0:basete troenpy=-0.2875533939360466\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36125\n",
      "fle max=4.798412900452896\n",
      "normalizing bias features ...\n",
      "number of ebias features =36125\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36125,)\n",
      "len of dfs: 36125\n",
      "the vocabulary size =:36125\n",
      "time passed:22529.045496225357\n",
      "DC0:base entropy=-1.3855673288845975, DC0:basete troenpy=-0.2875533939360466\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36125\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 27 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36190,)\n",
      "len of dfs: 36190\n",
      "the vocabulary size =:36190\n",
      "time passed:22591.78958916664\n",
      "DC0:base entropy=-1.385651683992352, DC0:basete troenpy=-0.2874876567962397\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36190\n",
      "fle max=2.76714545698574\n",
      "normalizing bias features ...\n",
      "number of ebias features =36190\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36190,)\n",
      "len of dfs: 36190\n",
      "the vocabulary size =:36190\n",
      "time passed:22653.954668045044\n",
      "DC0:base entropy=-1.385651683992352, DC0:basete troenpy=-0.2874876567962397\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36190\n",
      "fle max=10000.72173417387\n",
      "normalizing bias features ...\n",
      "number of ebias features =36190\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36190,)\n",
      "len of dfs: 36190\n",
      "the vocabulary size =:36190\n",
      "time passed:22715.5906791687\n",
      "DC0:base entropy=-1.385651683992352, DC0:basete troenpy=-0.2874876567962397\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36190\n",
      "fle max=91.57020452361287\n",
      "normalizing bias features ...\n",
      "number of ebias features =36190\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36190,)\n",
      "len of dfs: 36190\n",
      "the vocabulary size =:36190\n",
      "time passed:22777.24506497383\n",
      "DC0:base entropy=-1.385651683992352, DC0:basete troenpy=-0.2874876567962397\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36190\n",
      "fle max=4.7418349530497474\n",
      "normalizing bias features ...\n",
      "number of ebias features =36190\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36190,)\n",
      "len of dfs: 36190\n",
      "the vocabulary size =:36190\n",
      "time passed:22839.06609392166\n",
      "DC0:base entropy=-1.385651683992352, DC0:basete troenpy=-0.2874876567962397\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36190\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 28 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36074,)\n",
      "len of dfs: 36074\n",
      "the vocabulary size =:36074\n",
      "time passed:22902.79064297676\n",
      "DC0:base entropy=-1.3856374203706083, DC0:basete troenpy=-0.28749874142487775\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36074\n",
      "fle max=2.7664897284093075\n",
      "normalizing bias features ...\n",
      "number of ebias features =36074\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36074,)\n",
      "len of dfs: 36074\n",
      "the vocabulary size =:36074\n",
      "time passed:22964.538915872574\n",
      "DC0:base entropy=-1.3856374203706083, DC0:basete troenpy=-0.28749874142487775\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36074\n",
      "fle max=10000.72174160387\n",
      "normalizing bias features ...\n",
      "number of ebias features =36074\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36074,)\n",
      "len of dfs: 36074\n",
      "the vocabulary size =:36074\n",
      "time passed:23026.561553955078\n",
      "DC0:base entropy=-1.3856374203706083, DC0:basete troenpy=-0.28749874142487775\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36074\n",
      "fle max=82.60638512971781\n",
      "normalizing bias features ...\n",
      "number of ebias features =36074\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36074,)\n",
      "len of dfs: 36074\n",
      "the vocabulary size =:36074\n",
      "time passed:23087.84723520279\n",
      "DC0:base entropy=-1.3856374203706083, DC0:basete troenpy=-0.28749874142487775\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36074\n",
      "fle max=4.8724662200954505\n",
      "normalizing bias features ...\n",
      "number of ebias features =36074\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36074,)\n",
      "len of dfs: 36074\n",
      "the vocabulary size =:36074\n",
      "time passed:23148.987226963043\n",
      "DC0:base entropy=-1.3856374203706083, DC0:basete troenpy=-0.28749874142487775\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36074\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 29 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35799,)\n",
      "len of dfs: 35799\n",
      "the vocabulary size =:35799\n",
      "time passed:23212.015450000763\n",
      "DC0:base entropy=-1.3855502785560065, DC0:basete troenpy=-0.28756658429160276\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35799\n",
      "fle max=2.766511271180846\n",
      "normalizing bias features ...\n",
      "number of ebias features =35799\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35799,)\n",
      "len of dfs: 35799\n",
      "the vocabulary size =:35799\n",
      "time passed:23274.193164110184\n",
      "DC0:base entropy=-1.3855502785560065, DC0:basete troenpy=-0.28756658429160276\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35799\n",
      "fle max=10000.72178699985\n",
      "normalizing bias features ...\n",
      "number of ebias features =35799\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35799,)\n",
      "len of dfs: 35799\n",
      "the vocabulary size =:35799\n",
      "time passed:23334.95333313942\n",
      "DC0:base entropy=-1.3855502785560065, DC0:basete troenpy=-0.28756658429160276\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35799\n",
      "fle max=80.61397480394315\n",
      "normalizing bias features ...\n",
      "number of ebias features =35799\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35799,)\n",
      "len of dfs: 35799\n",
      "the vocabulary size =:35799\n",
      "time passed:23395.38739013672\n",
      "DC0:base entropy=-1.3855502785560065, DC0:basete troenpy=-0.28756658429160276\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35799\n",
      "fle max=4.809355161340643\n",
      "normalizing bias features ...\n",
      "number of ebias features =35799\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35799,)\n",
      "len of dfs: 35799\n",
      "the vocabulary size =:35799\n",
      "time passed:23455.65938591957\n",
      "DC0:base entropy=-1.3855502785560065, DC0:basete troenpy=-0.28756658429160276\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35799\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 30 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36371,)\n",
      "len of dfs: 36371\n",
      "the vocabulary size =:36371\n",
      "time passed:23517.980152845383\n",
      "DC0:base entropy=-1.3855896926084024, DC0:basete troenpy=-0.28753577168180966\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36371\n",
      "fle max=2.766555923615067\n",
      "normalizing bias features ...\n",
      "number of ebias features =36371\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36371,)\n",
      "len of dfs: 36371\n",
      "the vocabulary size =:36371\n",
      "time passed:23580.150601148605\n",
      "DC0:base entropy=-1.3855896926084024, DC0:basete troenpy=-0.28753577168180966\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36371\n",
      "fle max=10000.72176646664\n",
      "normalizing bias features ...\n",
      "number of ebias features =36371\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36371,)\n",
      "len of dfs: 36371\n",
      "the vocabulary size =:36371\n",
      "time passed:23642.098097085953\n",
      "DC0:base entropy=-1.3855896926084024, DC0:basete troenpy=-0.28753577168180966\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36371\n",
      "fle max=84.59855683677193\n",
      "normalizing bias features ...\n",
      "number of ebias features =36371\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36371,)\n",
      "len of dfs: 36371\n",
      "the vocabulary size =:36371\n",
      "time passed:23703.509604930878\n",
      "DC0:base entropy=-1.3855896926084024, DC0:basete troenpy=-0.28753577168180966\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =36371\n",
      "fle max=5.277968358460546\n",
      "normalizing bias features ...\n",
      "number of ebias features =36371\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36371,)\n",
      "len of dfs: 36371\n",
      "the vocabulary size =:36371\n",
      "time passed:23764.855802059174\n",
      "DC0:base entropy=-1.3855896926084024, DC0:basete troenpy=-0.28753577168180966\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36371\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 31 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35921,)\n",
      "len of dfs: 35921\n",
      "the vocabulary size =:35921\n",
      "time passed:23828.461749076843\n",
      "DC0:base entropy=-1.3856341141561583, DC0:basete troenpy=-0.287501329089891\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35921\n",
      "fle max=2.767201765894854\n",
      "normalizing bias features ...\n",
      "number of ebias features =35921\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35921,)\n",
      "len of dfs: 35921\n",
      "the vocabulary size =:35921\n",
      "time passed:23889.59832715988\n",
      "DC0:base entropy=-1.3856341141561583, DC0:basete troenpy=-0.287501329089891\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35921\n",
      "fle max=10000.721743326118\n",
      "normalizing bias features ...\n",
      "number of ebias features =35921\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35921,)\n",
      "len of dfs: 35921\n",
      "the vocabulary size =:35921\n",
      "time passed:23951.30103111267\n",
      "DC0:base entropy=-1.3856341141561583, DC0:basete troenpy=-0.287501329089891\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35921\n",
      "fle max=90.57435437478213\n",
      "normalizing bias features ...\n",
      "number of ebias features =35921\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35921,)\n",
      "len of dfs: 35921\n",
      "the vocabulary size =:35921\n",
      "time passed:24012.664649009705\n",
      "DC0:base entropy=-1.3856341141561583, DC0:basete troenpy=-0.287501329089891\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35921\n",
      "fle max=4.798360835606741\n",
      "normalizing bias features ...\n",
      "number of ebias features =35921\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35921,)\n",
      "len of dfs: 35921\n",
      "the vocabulary size =:35921\n",
      "time passed:24073.673164129257\n",
      "DC0:base entropy=-1.3856341141561583, DC0:basete troenpy=-0.287501329089891\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35921\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 32 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35662,)\n",
      "len of dfs: 35662\n",
      "the vocabulary size =:35662\n",
      "time passed:24136.414313077927\n",
      "DC0:base entropy=-1.3855532205529726, DC0:basete troenpy=-0.28756465874227755\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35662\n",
      "fle max=2.7674338068924174\n",
      "normalizing bias features ...\n",
      "number of ebias features =35662\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35662,)\n",
      "len of dfs: 35662\n",
      "the vocabulary size =:35662\n",
      "time passed:24196.261206150055\n",
      "DC0:base entropy=-1.3855532205529726, DC0:basete troenpy=-0.28756465874227755\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35662\n",
      "fle max=10000.721785467142\n",
      "normalizing bias features ...\n",
      "number of ebias features =35662\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35662,)\n",
      "len of dfs: 35662\n",
      "the vocabulary size =:35662\n",
      "time passed:24256.37885904312\n",
      "DC0:base entropy=-1.3855532205529726, DC0:basete troenpy=-0.28756465874227755\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35662\n",
      "fle max=142.3125592648633\n",
      "normalizing bias features ...\n",
      "number of ebias features =35662\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35662,)\n",
      "len of dfs: 35662\n",
      "the vocabulary size =:35662\n",
      "time passed:24316.833827257156\n",
      "DC0:base entropy=-1.3855532205529726, DC0:basete troenpy=-0.28756465874227755\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35662\n",
      "fle max=4.86227563724566\n",
      "normalizing bias features ...\n",
      "number of ebias features =35662\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35662,)\n",
      "len of dfs: 35662\n",
      "the vocabulary size =:35662\n",
      "time passed:24376.998162984848\n",
      "DC0:base entropy=-1.3855532205529726, DC0:basete troenpy=-0.28756465874227755\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35662\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 33 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35982,)\n",
      "len of dfs: 35982\n",
      "the vocabulary size =:35982\n",
      "time passed:24438.78732395172\n",
      "DC0:base entropy=-1.385608195569617, DC0:basete troenpy=-0.28752132163234134\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35982\n",
      "fle max=2.7669107872453003\n",
      "normalizing bias features ...\n",
      "number of ebias features =35982\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35982,)\n",
      "len of dfs: 35982\n",
      "the vocabulary size =:35982\n",
      "time passed:24499.641292095184\n",
      "DC0:base entropy=-1.385608195569617, DC0:basete troenpy=-0.28752132163234134\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35982\n",
      "fle max=10000.72175682771\n",
      "normalizing bias features ...\n",
      "number of ebias features =35982\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35982,)\n",
      "len of dfs: 35982\n",
      "the vocabulary size =:35982\n",
      "time passed:24560.670649051666\n",
      "DC0:base entropy=-1.385608195569617, DC0:basete troenpy=-0.28752132163234134\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =35982\n",
      "fle max=87.58661204702516\n",
      "normalizing bias features ...\n",
      "number of ebias features =35982\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35982,)\n",
      "len of dfs: 35982\n",
      "the vocabulary size =:35982\n",
      "time passed:24621.639412164688\n",
      "DC0:base entropy=-1.385608195569617, DC0:basete troenpy=-0.28752132163234134\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35982\n",
      "fle max=4.851869513100177\n",
      "normalizing bias features ...\n",
      "number of ebias features =35982\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35982,)\n",
      "len of dfs: 35982\n",
      "the vocabulary size =:35982\n",
      "time passed:24682.814471960068\n",
      "DC0:base entropy=-1.385608195569617, DC0:basete troenpy=-0.28752132163234134\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35982\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 34 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36096,)\n",
      "len of dfs: 36096\n",
      "the vocabulary size =:36096\n",
      "time passed:24746.071149110794\n",
      "DC0:base entropy=-1.3856581116780675, DC0:basete troenpy=-0.2874826377827898\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36096\n",
      "fle max=2.7670071698268246\n",
      "normalizing bias features ...\n",
      "number of ebias features =36096\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36096,)\n",
      "len of dfs: 36096\n",
      "the vocabulary size =:36096\n",
      "time passed:24807.67267012596\n",
      "DC0:base entropy=-1.3856581116780675, DC0:basete troenpy=-0.2874826377827898\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36096\n",
      "fle max=10000.721730825702\n",
      "normalizing bias features ...\n",
      "number of ebias features =36096\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36096,)\n",
      "len of dfs: 36096\n",
      "the vocabulary size =:36096\n",
      "time passed:24868.87942624092\n",
      "DC0:base entropy=-1.3856581116780675, DC0:basete troenpy=-0.2874826377827898\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36096\n",
      "fle max=84.59860543318923\n",
      "normalizing bias features ...\n",
      "number of ebias features =36096\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36096,)\n",
      "len of dfs: 36096\n",
      "the vocabulary size =:36096\n",
      "time passed:24929.958440065384\n",
      "DC0:base entropy=-1.3856581116780675, DC0:basete troenpy=-0.2874826377827898\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36096\n",
      "fle max=4.764819452260997\n",
      "normalizing bias features ...\n",
      "number of ebias features =36096\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36096,)\n",
      "len of dfs: 36096\n",
      "the vocabulary size =:36096\n",
      "time passed:24991.303452014923\n",
      "DC0:base entropy=-1.3856581116780675, DC0:basete troenpy=-0.2874826377827898\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36096\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 35 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36288,)\n",
      "len of dfs: 36288\n",
      "the vocabulary size =:36288\n",
      "time passed:25054.040521144867\n",
      "DC0:base entropy=-1.3856587130761895, DC0:basete troenpy=-0.2874821884469711\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36288\n",
      "fle max=2.7664446219848875\n",
      "normalizing bias features ...\n",
      "number of ebias features =36288\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36288,)\n",
      "len of dfs: 36288\n",
      "the vocabulary size =:36288\n",
      "time passed:25115.31054019928\n",
      "DC0:base entropy=-1.3856587130761895, DC0:basete troenpy=-0.2874821884469711\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36288\n",
      "fle max=10000.721730512438\n",
      "normalizing bias features ...\n",
      "number of ebias features =36288\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36288,)\n",
      "len of dfs: 36288\n",
      "the vocabulary size =:36288\n",
      "time passed:25177.01600599289\n",
      "DC0:base entropy=-1.3856587130761895, DC0:basete troenpy=-0.2874821884469711\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36288\n",
      "fle max=86.59066927630295\n",
      "normalizing bias features ...\n",
      "number of ebias features =36288\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36288,)\n",
      "len of dfs: 36288\n",
      "the vocabulary size =:36288\n",
      "time passed:25239.005696058273\n",
      "DC0:base entropy=-1.3856587130761895, DC0:basete troenpy=-0.2874821884469711\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36288\n",
      "fle max=4.8621931669503535\n",
      "normalizing bias features ...\n",
      "number of ebias features =36288\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36288,)\n",
      "len of dfs: 36288\n",
      "the vocabulary size =:36288\n",
      "time passed:25300.49569416046\n",
      "DC0:base entropy=-1.3856587130761895, DC0:basete troenpy=-0.2874821884469711\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36288\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 36 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36208,)\n",
      "len of dfs: 36208\n",
      "the vocabulary size =:36208\n",
      "time passed:25364.0804271698\n",
      "DC0:base entropy=-1.3855989503986885, DC0:basete troenpy=-0.28752866389086035\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36208\n",
      "fle max=2.7674123532922907\n",
      "normalizing bias features ...\n",
      "number of ebias features =36208\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36208,)\n",
      "len of dfs: 36208\n",
      "the vocabulary size =:36208\n",
      "time passed:25425.770714998245\n",
      "DC0:base entropy=-1.3855989503986885, DC0:basete troenpy=-0.28752866389086035\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =36208\n",
      "fle max=10000.721761643856\n",
      "normalizing bias features ...\n",
      "number of ebias features =36208\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36208,)\n",
      "len of dfs: 36208\n",
      "the vocabulary size =:36208\n",
      "time passed:25487.27320098877\n",
      "DC0:base entropy=-1.3855989503986885, DC0:basete troenpy=-0.28752866389086035\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36208\n",
      "fle max=145.2947438816247\n",
      "normalizing bias features ...\n",
      "number of ebias features =36208\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36208,)\n",
      "len of dfs: 36208\n",
      "the vocabulary size =:36208\n",
      "time passed:25548.64794921875\n",
      "DC0:base entropy=-1.3855989503986885, DC0:basete troenpy=-0.28752866389086035\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36208\n",
      "fle max=4.776165033623\n",
      "normalizing bias features ...\n",
      "number of ebias features =36208\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36208,)\n",
      "len of dfs: 36208\n",
      "the vocabulary size =:36208\n",
      "time passed:25609.779237031937\n",
      "DC0:base entropy=-1.3855989503986885, DC0:basete troenpy=-0.28752866389086035\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36208\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 37 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36048,)\n",
      "len of dfs: 36048\n",
      "the vocabulary size =:36048\n",
      "time passed:25672.451114177704\n",
      "DC0:base entropy=-1.385601377017288, DC0:basete troenpy=-0.2875269917065486\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36048\n",
      "fle max=2.767154920674546\n",
      "normalizing bias features ...\n",
      "number of ebias features =36048\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36048,)\n",
      "len of dfs: 36048\n",
      "the vocabulary size =:36048\n",
      "time passed:25733.111032247543\n",
      "DC0:base entropy=-1.385601377017288, DC0:basete troenpy=-0.2875269917065486\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36048\n",
      "fle max=10000.721760379736\n",
      "normalizing bias features ...\n",
      "number of ebias features =36048\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36048,)\n",
      "len of dfs: 36048\n",
      "the vocabulary size =:36048\n",
      "time passed:25793.958146095276\n",
      "DC0:base entropy=-1.385601377017288, DC0:basete troenpy=-0.2875269917065486\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36048\n",
      "fle max=82.60635934174604\n",
      "normalizing bias features ...\n",
      "number of ebias features =36048\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36048,)\n",
      "len of dfs: 36048\n",
      "the vocabulary size =:36048\n",
      "time passed:25855.360867261887\n",
      "DC0:base entropy=-1.385601377017288, DC0:basete troenpy=-0.2875269917065486\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36048\n",
      "fle max=4.787336662036814\n",
      "normalizing bias features ...\n",
      "number of ebias features =36048\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36048,)\n",
      "len of dfs: 36048\n",
      "the vocabulary size =:36048\n",
      "time passed:25915.646438121796\n",
      "DC0:base entropy=-1.385601377017288, DC0:basete troenpy=-0.2875269917065486\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36048\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 38 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35879,)\n",
      "len of dfs: 35879\n",
      "the vocabulary size =:35879\n",
      "time passed:25977.92220902443\n",
      "DC0:base entropy=-1.3856545288939623, DC0:basete troenpy=-0.287485459875361\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35879\n",
      "fle max=2.7670444053309673\n",
      "normalizing bias features ...\n",
      "number of ebias features =35879\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35879,)\n",
      "len of dfs: 35879\n",
      "the vocabulary size =:35879\n",
      "time passed:26038.82221698761\n",
      "DC0:base entropy=-1.3856545288939623, DC0:basete troenpy=-0.287485459875361\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35879\n",
      "fle max=10000.721732691964\n",
      "normalizing bias features ...\n",
      "number of ebias features =35879\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35879,)\n",
      "len of dfs: 35879\n",
      "the vocabulary size =:35879\n",
      "time passed:26099.837475299835\n",
      "DC0:base entropy=-1.3856545288939623, DC0:basete troenpy=-0.287485459875361\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35879\n",
      "fle max=82.60639727760834\n",
      "normalizing bias features ...\n",
      "number of ebias features =35879\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35879,)\n",
      "len of dfs: 35879\n",
      "the vocabulary size =:35879\n",
      "time passed:26160.370700120926\n",
      "DC0:base entropy=-1.3856545288939623, DC0:basete troenpy=-0.287485459875361\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35879\n",
      "fle max=4.764822274353568\n",
      "normalizing bias features ...\n",
      "number of ebias features =35879\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35879,)\n",
      "len of dfs: 35879\n",
      "the vocabulary size =:35879\n",
      "time passed:26220.5192861557\n",
      "DC0:base entropy=-1.3856545288939623, DC0:basete troenpy=-0.287485459875361\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35879\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 39 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36388,)\n",
      "len of dfs: 36388\n",
      "the vocabulary size =:36388\n",
      "time passed:26282.844622135162\n",
      "DC0:base entropy=-1.3856104170622268, DC0:basete troenpy=-0.28751968063282995\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =36388\n",
      "fle max=2.7673904471052317\n",
      "normalizing bias features ...\n",
      "number of ebias features =36388\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36388,)\n",
      "len of dfs: 36388\n",
      "the vocabulary size =:36388\n",
      "time passed:26344.602828264236\n",
      "DC0:base entropy=-1.3856104170622268, DC0:basete troenpy=-0.28751968063282995\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36388\n",
      "fle max=10000.721755670464\n",
      "normalizing bias features ...\n",
      "number of ebias features =36388\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36388,)\n",
      "len of dfs: 36388\n",
      "the vocabulary size =:36388\n",
      "time passed:26405.415025949478\n",
      "DC0:base entropy=-1.3856104170622268, DC0:basete troenpy=-0.28751968063282995\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36388\n",
      "fle max=88.58255681278186\n",
      "normalizing bias features ...\n",
      "number of ebias features =36388\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36388,)\n",
      "len of dfs: 36388\n",
      "the vocabulary size =:36388\n",
      "time passed:26467.33660006523\n",
      "DC0:base entropy=-1.3856104170622268, DC0:basete troenpy=-0.28751968063282995\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36388\n",
      "fle max=4.80930825768187\n",
      "normalizing bias features ...\n",
      "number of ebias features =36388\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36388,)\n",
      "len of dfs: 36388\n",
      "the vocabulary size =:36388\n",
      "time passed:26528.834082126617\n",
      "DC0:base entropy=-1.3856104170622268, DC0:basete troenpy=-0.28751968063282995\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36388\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 40 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36304,)\n",
      "len of dfs: 36304\n",
      "the vocabulary size =:36304\n",
      "time passed:26592.155747175217\n",
      "DC0:base entropy=-1.385658607056569, DC0:basete troenpy=-0.28748226097888196\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36304\n",
      "fle max=2.767293592129646\n",
      "normalizing bias features ...\n",
      "number of ebias features =36304\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36304,)\n",
      "len of dfs: 36304\n",
      "the vocabulary size =:36304\n",
      "time passed:26653.315654993057\n",
      "DC0:base entropy=-1.385658607056569, DC0:basete troenpy=-0.28748226097888196\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36304\n",
      "fle max=10000.721730567662\n",
      "normalizing bias features ...\n",
      "number of ebias features =36304\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36304,)\n",
      "len of dfs: 36304\n",
      "the vocabulary size =:36304\n",
      "time passed:26715.14774107933\n",
      "DC0:base entropy=-1.385658607056569, DC0:basete troenpy=-0.28748226097888196\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36304\n",
      "fle max=142.31263452099802\n",
      "normalizing bias features ...\n",
      "number of ebias features =36304\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36304,)\n",
      "len of dfs: 36304\n",
      "the vocabulary size =:36304\n",
      "time passed:26779.27251315117\n",
      "DC0:base entropy=-1.385658607056569, DC0:basete troenpy=-0.28748226097888196\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36304\n",
      "fle max=4.753390379633466\n",
      "normalizing bias features ...\n",
      "number of ebias features =36304\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36304,)\n",
      "len of dfs: 36304\n",
      "the vocabulary size =:36304\n",
      "time passed:26846.486242055893\n",
      "DC0:base entropy=-1.385658607056569, DC0:basete troenpy=-0.28748226097888196\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36304\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 41 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35969,)\n",
      "len of dfs: 35969\n",
      "the vocabulary size =:35969\n",
      "time passed:26916.016477108\n",
      "DC0:base entropy=-1.3855524701091841, DC0:basete troenpy=-0.2875652784680388\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35969\n",
      "fle max=2.7671831172183206\n",
      "normalizing bias features ...\n",
      "number of ebias features =35969\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35969,)\n",
      "len of dfs: 35969\n",
      "the vocabulary size =:35969\n",
      "time passed:26983.477912902832\n",
      "DC0:base entropy=-1.3855524701091841, DC0:basete troenpy=-0.2875652784680388\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35969\n",
      "fle max=10000.721785858104\n",
      "normalizing bias features ...\n",
      "number of ebias features =35969\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35969,)\n",
      "len of dfs: 35969\n",
      "the vocabulary size =:35969\n",
      "time passed:27050.693652153015\n",
      "DC0:base entropy=-1.3855524701091841, DC0:basete troenpy=-0.2875652784680388\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35969\n",
      "fle max=93.56170379045858\n",
      "normalizing bias features ...\n",
      "number of ebias features =35969\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35969,)\n",
      "len of dfs: 35969\n",
      "the vocabulary size =:35969\n",
      "time passed:27118.30202293396\n",
      "DC0:base entropy=-1.3855524701091841, DC0:basete troenpy=-0.2875652784680388\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35969\n",
      "fle max=4.753473397122622\n",
      "normalizing bias features ...\n",
      "number of ebias features =35969\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35969,)\n",
      "len of dfs: 35969\n",
      "the vocabulary size =:35969\n",
      "time passed:27186.20538711548\n",
      "DC0:base entropy=-1.3855524701091841, DC0:basete troenpy=-0.2875652784680388\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35969\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 42 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36067,)\n",
      "len of dfs: 36067\n",
      "the vocabulary size =:36067\n",
      "time passed:27256.002504110336\n",
      "DC0:base entropy=-1.3856524819221847, DC0:basete troenpy=-0.28748705612042513\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36067\n",
      "fle max=2.765413997236585\n",
      "normalizing bias features ...\n",
      "number of ebias features =36067\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36067,)\n",
      "len of dfs: 36067\n",
      "the vocabulary size =:36067\n",
      "time passed:27324.334336042404\n",
      "DC0:base entropy=-1.3856524819221847, DC0:basete troenpy=-0.28748705612042513\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36067\n",
      "fle max=10000.721733758228\n",
      "normalizing bias features ...\n",
      "number of ebias features =36067\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36067,)\n",
      "len of dfs: 36067\n",
      "the vocabulary size =:36067\n",
      "time passed:27392.44753599167\n",
      "DC0:base entropy=-1.3856524819221847, DC0:basete troenpy=-0.28748705612042513\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36067\n",
      "fle max=88.58258665919253\n",
      "normalizing bias features ...\n",
      "number of ebias features =36067\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36067,)\n",
      "len of dfs: 36067\n",
      "the vocabulary size =:36067\n",
      "time passed:27460.733387231827\n",
      "DC0:base entropy=-1.3856524819221847, DC0:basete troenpy=-0.28748705612042513\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36067\n",
      "fle max=4.841363947720966\n",
      "normalizing bias features ...\n",
      "number of ebias features =36067\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36067,)\n",
      "len of dfs: 36067\n",
      "the vocabulary size =:36067\n",
      "time passed:27528.968043088913\n",
      "DC0:base entropy=-1.3856524819221847, DC0:basete troenpy=-0.28748705612042513\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36067\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 43 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36134,)\n",
      "len of dfs: 36134\n",
      "the vocabulary size =:36134\n",
      "time passed:27598.931955099106\n",
      "DC0:base entropy=-1.3856554161991812, DC0:basete troenpy=-0.2874847798787178\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36134\n",
      "fle max=2.7677217125995313\n",
      "normalizing bias features ...\n",
      "number of ebias features =36134\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36134,)\n",
      "len of dfs: 36134\n",
      "the vocabulary size =:36134\n",
      "time passed:27667.472343206406\n",
      "DC0:base entropy=-1.3856554161991812, DC0:basete troenpy=-0.2874847798787178\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36134\n",
      "fle max=10000.721732229767\n",
      "normalizing bias features ...\n",
      "number of ebias features =36134\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36134,)\n",
      "len of dfs: 36134\n",
      "the vocabulary size =:36134\n",
      "time passed:27736.161238193512\n",
      "DC0:base entropy=-1.3856554161991812, DC0:basete troenpy=-0.2874847798787178\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36134\n",
      "fle max=86.59066691228116\n",
      "normalizing bias features ...\n",
      "number of ebias features =36134\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36134,)\n",
      "len of dfs: 36134\n",
      "the vocabulary size =:36134\n",
      "time passed:27804.416721105576\n",
      "DC0:base entropy=-1.3856554161991812, DC0:basete troenpy=-0.2874847798787178\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36134\n",
      "fle max=4.776121149610858\n",
      "normalizing bias features ...\n",
      "number of ebias features =36134\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36134,)\n",
      "len of dfs: 36134\n",
      "the vocabulary size =:36134\n",
      "time passed:27872.883311986923\n",
      "DC0:base entropy=-1.3856554161991812, DC0:basete troenpy=-0.2874847798787178\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36134\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 44 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35914,)\n",
      "len of dfs: 35914\n",
      "the vocabulary size =:35914\n",
      "time passed:27942.558402061462\n",
      "DC0:base entropy=-1.3856546182534315, DC0:basete troenpy=-0.2874853805575057\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35914\n",
      "fle max=2.7661003242197095\n",
      "normalizing bias features ...\n",
      "number of ebias features =35914\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35914,)\n",
      "len of dfs: 35914\n",
      "the vocabulary size =:35914\n",
      "time passed:28010.46092915535\n",
      "DC0:base entropy=-1.3856546182534315, DC0:basete troenpy=-0.2874853805575057\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35914\n",
      "fle max=10000.721732645416\n",
      "normalizing bias features ...\n",
      "number of ebias features =35914\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35914,)\n",
      "len of dfs: 35914\n",
      "the vocabulary size =:35914\n",
      "time passed:28078.700605154037\n",
      "DC0:base entropy=-1.3856546182534315, DC0:basete troenpy=-0.2874853805575057\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35914\n",
      "fle max=148.27664256421545\n",
      "normalizing bias features ...\n",
      "number of ebias features =35914\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35914,)\n",
      "len of dfs: 35914\n",
      "the vocabulary size =:35914\n",
      "time passed:28148.02125597\n",
      "DC0:base entropy=-1.3856546182534315, DC0:basete troenpy=-0.2874853805575057\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =35914\n",
      "fle max=4.776121750289645\n",
      "normalizing bias features ...\n",
      "number of ebias features =35914\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35914,)\n",
      "len of dfs: 35914\n",
      "the vocabulary size =:35914\n",
      "time passed:28215.899017095566\n",
      "DC0:base entropy=-1.3856546182534315, DC0:basete troenpy=-0.2874853805575057\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35914\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 45 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36383,)\n",
      "len of dfs: 36383\n",
      "the vocabulary size =:36383\n",
      "time passed:28285.981223106384\n",
      "DC0:base entropy=-1.3856105269383634, DC0:basete troenpy=-0.2875197253726206\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36383\n",
      "fle max=2.7664799330033794\n",
      "normalizing bias features ...\n",
      "number of ebias features =36383\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36383,)\n",
      "len of dfs: 36383\n",
      "the vocabulary size =:36383\n",
      "time passed:28354.797915935516\n",
      "DC0:base entropy=-1.3856105269383634, DC0:basete troenpy=-0.2875197253726206\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36383\n",
      "fle max=10000.721755613225\n",
      "normalizing bias features ...\n",
      "number of ebias features =36383\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36383,)\n",
      "len of dfs: 36383\n",
      "the vocabulary size =:36383\n",
      "time passed:28423.605951070786\n",
      "DC0:base entropy=-1.3856105269383634, DC0:basete troenpy=-0.2875197253726206\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36383\n",
      "fle max=87.5866135483248\n",
      "normalizing bias features ...\n",
      "number of ebias features =36383\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36383,)\n",
      "len of dfs: 36383\n",
      "the vocabulary size =:36383\n",
      "time passed:28492.595268011093\n",
      "DC0:base entropy=-1.3856105269383634, DC0:basete troenpy=-0.2875197253726206\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36383\n",
      "fle max=4.809308302421661\n",
      "normalizing bias features ...\n",
      "number of ebias features =36383\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36383,)\n",
      "len of dfs: 36383\n",
      "the vocabulary size =:36383\n",
      "time passed:28556.05173110962\n",
      "DC0:base entropy=-1.3856105269383634, DC0:basete troenpy=-0.2875197253726206\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36383\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 46 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35869,)\n",
      "len of dfs: 35869\n",
      "the vocabulary size =:35869\n",
      "time passed:28621.046409130096\n",
      "DC0:base entropy=-1.385641887354899, DC0:basete troenpy=-0.2874953538085684\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35869\n",
      "fle max=2.76696136655337\n",
      "normalizing bias features ...\n",
      "number of ebias features =35869\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35869,)\n",
      "len of dfs: 35869\n",
      "the vocabulary size =:35869\n",
      "time passed:28683.23161315918\n",
      "DC0:base entropy=-1.385641887354899, DC0:basete troenpy=-0.2874953538085684\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35869\n",
      "fle max=10000.721739276976\n",
      "normalizing bias features ...\n",
      "number of ebias features =35869\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35869,)\n",
      "len of dfs: 35869\n",
      "the vocabulary size =:35869\n",
      "time passed:28744.692227125168\n",
      "DC0:base entropy=-1.385641887354899, DC0:basete troenpy=-0.2874953538085684\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35869\n",
      "fle max=88.5825790838564\n",
      "normalizing bias features ...\n",
      "number of ebias features =35869\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35869,)\n",
      "len of dfs: 35869\n",
      "the vocabulary size =:35869\n",
      "time passed:28805.656103134155\n",
      "DC0:base entropy=-1.385641887354899, DC0:basete troenpy=-0.2874953538085684\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =35869\n",
      "fle max=4.787305024138833\n",
      "normalizing bias features ...\n",
      "number of ebias features =35869\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(35869,)\n",
      "len of dfs: 35869\n",
      "the vocabulary size =:35869\n",
      "time passed:28867.067645072937\n",
      "DC0:base entropy=-1.385641887354899, DC0:basete troenpy=-0.2874953538085684\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =35869\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 47 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36167,)\n",
      "len of dfs: 36167\n",
      "the vocabulary size =:36167\n",
      "time passed:28929.281585931778\n",
      "DC0:base entropy=-1.3855900553946676, DC0:basete troenpy=-0.2875360355057532\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36167\n",
      "fle max=2.766844916449153\n",
      "normalizing bias features ...\n",
      "number of ebias features =36167\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36167,)\n",
      "len of dfs: 36167\n",
      "the vocabulary size =:36167\n",
      "time passed:28991.437610149384\n",
      "DC0:base entropy=-1.3855900553946676, DC0:basete troenpy=-0.2875360355057532\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36167\n",
      "fle max=10000.721766277647\n",
      "normalizing bias features ...\n",
      "number of ebias features =36167\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36167,)\n",
      "len of dfs: 36167\n",
      "the vocabulary size =:36167\n",
      "time passed:29052.82211303711\n",
      "DC0:base entropy=-1.3855900553946676, DC0:basete troenpy=-0.2875360355057532\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =36167\n",
      "fle max=86.59062012945074\n",
      "normalizing bias features ...\n",
      "number of ebias features =36167\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36167,)\n",
      "len of dfs: 36167\n",
      "the vocabulary size =:36167\n",
      "time passed:29114.128397226334\n",
      "DC0:base entropy=-1.3855900553946676, DC0:basete troenpy=-0.2875360355057532\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36167\n",
      "fle max=4.830830817775757\n",
      "normalizing bias features ...\n",
      "number of ebias features =36167\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36167,)\n",
      "len of dfs: 36167\n",
      "the vocabulary size =:36167\n",
      "time passed:29175.333502054214\n",
      "DC0:base entropy=-1.3855900553946676, DC0:basete troenpy=-0.2875360355057532\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36167\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 48 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36048,)\n",
      "len of dfs: 36048\n",
      "the vocabulary size =:36048\n",
      "time passed:29238.297380924225\n",
      "DC0:base entropy=-1.385653432574299, DC0:basete troenpy=-0.28748628618353184\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36048\n",
      "fle max=2.766843516140985\n",
      "normalizing bias features ...\n",
      "number of ebias features =36048\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36048,)\n",
      "len of dfs: 36048\n",
      "the vocabulary size =:36048\n",
      "time passed:29299.389609098434\n",
      "DC0:base entropy=-1.385653432574299, DC0:basete troenpy=-0.28748628618353184\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36048\n",
      "fle max=10000.721733263033\n",
      "normalizing bias features ...\n",
      "number of ebias features =36048\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36048,)\n",
      "len of dfs: 36048\n",
      "the vocabulary size =:36048\n",
      "time passed:29361.036620140076\n",
      "DC0:base entropy=-1.385653432574299, DC0:basete troenpy=-0.28748628618353184\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36048\n",
      "fle max=147.28272115856703\n",
      "normalizing bias features ...\n",
      "number of ebias features =36048\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36048,)\n",
      "len of dfs: 36048\n",
      "the vocabulary size =:36048\n",
      "time passed:29422.680229902267\n",
      "DC0:base entropy=-1.385653432574299, DC0:basete troenpy=-0.28748628618353184\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36048\n",
      "fle max=4.730137542673848\n",
      "normalizing bias features ...\n",
      "number of ebias features =36048\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36048,)\n",
      "len of dfs: 36048\n",
      "the vocabulary size =:36048\n",
      "time passed:29483.643523216248\n",
      "DC0:base entropy=-1.385653432574299, DC0:basete troenpy=-0.28748628618353184\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36048\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 49 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_amazon_by_line.txt\n",
      "kitchen--->0\n",
      "books--->1\n",
      "dvd--->2\n",
      "electronics--->3\n",
      "8000\n",
      "1600\n",
      "6400\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36151,)\n",
      "len of dfs: 36151\n",
      "the vocabulary size =:36151\n",
      "time passed:29547.111819028854\n",
      "DC0:base entropy=-1.3855870670862371, DC0:basete troenpy=-0.2875379413239145\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36151\n",
      "fle max=2.7656671782495375\n",
      "normalizing bias features ...\n",
      "number of ebias features =36151\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36151,)\n",
      "len of dfs: 36151\n",
      "the vocabulary size =:36151\n",
      "time passed:29610.57644701004\n",
      "DC0:base entropy=-1.3855870670862371, DC0:basete troenpy=-0.2875379413239145\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36151\n",
      "fle max=10000.7217678344\n",
      "normalizing bias features ...\n",
      "number of ebias features =36151\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36151,)\n",
      "len of dfs: 36151\n",
      "the vocabulary size =:36151\n",
      "time passed:29675.95903491974\n",
      "DC0:base entropy=-1.3855870670862371, DC0:basete troenpy=-0.2875379413239145\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36151\n",
      "fle max=151.2581618890439\n",
      "normalizing bias features ...\n",
      "number of ebias features =36151\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36151,)\n",
      "len of dfs: 36151\n",
      "the vocabulary size =:36151\n",
      "time passed:29742.483938217163\n",
      "DC0:base entropy=-1.3855870670862371, DC0:basete troenpy=-0.2875379413239145\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =36151\n",
      "fle max=4.776174311056054\n",
      "normalizing bias features ...\n",
      "number of ebias features =36151\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 6400 docs.\n",
      "shape of dfs:(36151,)\n",
      "len of dfs: 36151\n",
      "the vocabulary size =:36151\n",
      "time passed:29810.59873700142\n",
      "DC0:base entropy=-1.3855870670862371, DC0:basete troenpy=-0.2875379413239145\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =36151\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n"
     ]
    }
   ],
   "source": [
    "datanmlist=[bbcfn,twfn,amazonfn,classfn]\n",
    "#dfbbc=repeatSampling_knn(bbcfn, Nsample=50)\n",
    "dfamazon=repeatSampling_knn_rncf(amazonfn, Nsample=50)\n",
    "#dftw=repeatSampling_knn(twfn, Nsample=50)\n",
    "#dfclass=repeatSampling_knn(classfn, Nsample=50)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "94a86f44",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " ... 0 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12033,)\n",
      "len of dfs: 12033\n",
      "the vocabulary size =:12033\n",
      "time passed:9444.660082101822\n",
      "DC0:base entropy=-1.519881153818394, DC0:basete troenpy=-0.2765890314466325\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12033\n",
      "fle max=10000.657989472687\n",
      "normalizing bias features ...\n",
      "number of ebias features =12033\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 1 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11760,)\n",
      "len of dfs: 11760\n",
      "the vocabulary size =:11760\n",
      "time passed:9448.8377161026\n",
      "DC0:base entropy=-1.5283824190388045, DC0:basete troenpy=-0.27053892421493153\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11760\n",
      "fle max=10000.654329322606\n",
      "normalizing bias features ...\n",
      "number of ebias features =11760\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 2 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11799,)\n",
      "len of dfs: 11799\n",
      "the vocabulary size =:11799\n",
      "time passed:9452.628380060196\n",
      "DC0:base entropy=-1.5272383398218483, DC0:basete troenpy=-0.27051845723552187\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11799\n",
      "fle max=10000.6548195235\n",
      "normalizing bias features ...\n",
      "number of ebias features =11799\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 3 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11831,)\n",
      "len of dfs: 11831\n",
      "the vocabulary size =:11831\n",
      "time passed:9456.39665222168\n",
      "DC0:base entropy=-1.5143827123326123, DC0:basete troenpy=-0.2827691625441157\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11831\n",
      "fle max=10000.66037866764\n",
      "normalizing bias features ...\n",
      "number of ebias features =11831\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 4 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11935,)\n",
      "len of dfs: 11935\n",
      "the vocabulary size =:11935\n",
      "time passed:9460.165724992752\n",
      "DC0:base entropy=-1.517674784952863, DC0:basete troenpy=-0.27795200089665195\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11935\n",
      "fle max=10000.658946109223\n",
      "normalizing bias features ...\n",
      "number of ebias features =11935\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 5 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11946,)\n",
      "len of dfs: 11946\n",
      "the vocabulary size =:11946\n",
      "time passed:9464.048040151596\n",
      "DC0:base entropy=-1.5283400631361428, DC0:basete troenpy=-0.2696609774364784\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11946\n",
      "fle max=10000.654347457656\n",
      "normalizing bias features ...\n",
      "number of ebias features =11946\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 6 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11989,)\n",
      "len of dfs: 11989\n",
      "the vocabulary size =:11989\n",
      "time passed:9467.961478948593\n",
      "DC0:base entropy=-1.5263518913588958, DC0:basete troenpy=-0.27128681442402697\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11989\n",
      "fle max=10000.655199843264\n",
      "normalizing bias features ...\n",
      "number of ebias features =11989\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 7 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11762,)\n",
      "len of dfs: 11762\n",
      "the vocabulary size =:11762\n",
      "time passed:9471.939637184143\n",
      "DC0:base entropy=-1.5310260012027401, DC0:basete troenpy=-0.26802618551899593\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11762\n",
      "fle max=10000.653199435645\n",
      "normalizing bias features ...\n",
      "number of ebias features =11762\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 8 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11999,)\n",
      "len of dfs: 11999\n",
      "the vocabulary size =:11999\n",
      "time passed:9475.788377046585\n",
      "DC0:base entropy=-1.5309194935349337, DC0:basete troenpy=-0.26853913572769206\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11999\n",
      "fle max=10000.65324488238\n",
      "normalizing bias features ...\n",
      "number of ebias features =11999\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 9 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11697,)\n",
      "len of dfs: 11697\n",
      "the vocabulary size =:11697\n",
      "time passed:9479.676115989685\n",
      "DC0:base entropy=-1.5168529575787868, DC0:basete troenpy=-0.27721000932270023\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11697\n",
      "fle max=10000.659303148217\n",
      "normalizing bias features ...\n",
      "number of ebias features =11697\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 10 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11858,)\n",
      "len of dfs: 11858\n",
      "the vocabulary size =:11858\n",
      "time passed:9483.44408607483\n",
      "DC0:base entropy=-1.5335825740128495, DC0:basete troenpy=-0.267743171790736\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11858\n",
      "fle max=10000.652110442561\n",
      "normalizing bias features ...\n",
      "number of ebias features =11858\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 11 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11888,)\n",
      "len of dfs: 11888\n",
      "the vocabulary size =:11888\n",
      "time passed:9487.231313943863\n",
      "DC0:base entropy=-1.5284203099375104, DC0:basete troenpy=-0.26959171895449824\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11888\n",
      "fle max=10000.654313100138\n",
      "normalizing bias features ...\n",
      "number of ebias features =11888\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 12 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11824,)\n",
      "len of dfs: 11824\n",
      "the vocabulary size =:11824\n",
      "time passed:9491.087249040604\n",
      "DC0:base entropy=-1.5367863524039924, DC0:basete troenpy=-0.2642186873750922\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11824\n",
      "fle max=10000.65075088253\n",
      "normalizing bias features ...\n",
      "number of ebias features =11824\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 13 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11976,)\n",
      "len of dfs: 11976\n",
      "the vocabulary size =:11976\n",
      "time passed:9494.908266067505\n",
      "DC0:base entropy=-1.5165857384088142, DC0:basete troenpy=-0.2790483128525679\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11976\n",
      "fle max=10000.659419323685\n",
      "normalizing bias features ...\n",
      "number of ebias features =11976\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 14 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11963,)\n",
      "len of dfs: 11963\n",
      "the vocabulary size =:11963\n",
      "time passed:9498.795362234116\n",
      "DC0:base entropy=-1.530452725259926, DC0:basete troenpy=-0.2690365682777819\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11963\n",
      "fle max=10000.653444126634\n",
      "normalizing bias features ...\n",
      "number of ebias features =11963\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 15 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11976,)\n",
      "len of dfs: 11976\n",
      "the vocabulary size =:11976\n",
      "time passed:9502.63449716568\n",
      "DC0:base entropy=-1.5378323655803905, DC0:basete troenpy=-0.2641170063888836\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11976\n",
      "fle max=10000.650308221628\n",
      "normalizing bias features ...\n",
      "number of ebias features =11976\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 16 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12040,)\n",
      "len of dfs: 12040\n",
      "the vocabulary size =:12040\n",
      "time passed:9506.515596151352\n",
      "DC0:base entropy=-1.5269158331139883, DC0:basete troenpy=-0.27252987444783816\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12040\n",
      "fle max=10000.654957839912\n",
      "normalizing bias features ...\n",
      "number of ebias features =12040\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 17 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11910,)\n",
      "len of dfs: 11910\n",
      "the vocabulary size =:11910\n",
      "time passed:9510.459784030914\n",
      "DC0:base entropy=-1.5325476219036553, DC0:basete troenpy=-0.2673580492140625\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11910\n",
      "fle max=10000.65255085114\n",
      "normalizing bias features ...\n",
      "number of ebias features =11910\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 18 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11916,)\n",
      "len of dfs: 11916\n",
      "the vocabulary size =:11916\n",
      "time passed:9514.291818141937\n",
      "DC0:base entropy=-1.5346423481749065, DC0:basete troenpy=-0.26513980551216404\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11916\n",
      "fle max=10000.651660086924\n",
      "normalizing bias features ...\n",
      "number of ebias features =11916\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 19 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11898,)\n",
      "len of dfs: 11898\n",
      "the vocabulary size =:11898\n",
      "time passed:9518.248497962952\n",
      "DC0:base entropy=-1.514720896335218, DC0:basete troenpy=-0.2798615030020432\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11898\n",
      "fle max=10000.660231218531\n",
      "normalizing bias features ...\n",
      "number of ebias features =11898\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 20 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11809,)\n",
      "len of dfs: 11809\n",
      "the vocabulary size =:11809\n",
      "time passed:9522.194401025772\n",
      "DC0:base entropy=-1.5344612967093891, DC0:basete troenpy=-0.265725901467477\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11809\n",
      "fle max=10000.651736981468\n",
      "normalizing bias features ...\n",
      "number of ebias features =11809\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 21 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11753,)\n",
      "len of dfs: 11753\n",
      "the vocabulary size =:11753\n",
      "time passed:9526.012120246887\n",
      "DC0:base entropy=-1.5238435624783095, DC0:basete troenpy=-0.2739400598148661\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11753\n",
      "fle max=10000.656278408405\n",
      "normalizing bias features ...\n",
      "number of ebias features =11753\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 22 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11914,)\n",
      "len of dfs: 11914\n",
      "the vocabulary size =:11914\n",
      "time passed:9529.836796045303\n",
      "DC0:base entropy=-1.5076532458103407, DC0:basete troenpy=-0.2854071899627155\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11914\n",
      "fle max=10000.66332648799\n",
      "normalizing bias features ...\n",
      "number of ebias features =11914\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 23 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11824,)\n",
      "len of dfs: 11824\n",
      "the vocabulary size =:11824\n",
      "time passed:9533.734714984894\n",
      "DC0:base entropy=-1.5250633475259248, DC0:basete troenpy=-0.2718476361715477\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11824\n",
      "fle max=10000.655753465566\n",
      "normalizing bias features ...\n",
      "number of ebias features =11824\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 24 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11820,)\n",
      "len of dfs: 11820\n",
      "the vocabulary size =:11820\n",
      "time passed:9537.615784168243\n",
      "DC0:base entropy=-1.533810238762621, DC0:basete troenpy=-0.2659450075643933\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11820\n",
      "fle max=10000.652013642946\n",
      "normalizing bias features ...\n",
      "number of ebias features =11820\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 25 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11921,)\n",
      "len of dfs: 11921\n",
      "the vocabulary size =:11921\n",
      "time passed:9541.395245075226\n",
      "DC0:base entropy=-1.525025118930144, DC0:basete troenpy=-0.27180222288674705\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11921\n",
      "fle max=10000.655769904755\n",
      "normalizing bias features ...\n",
      "number of ebias features =11921\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 26 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11733,)\n",
      "len of dfs: 11733\n",
      "the vocabulary size =:11733\n",
      "time passed:9545.19446516037\n",
      "DC0:base entropy=-1.51223135125111, DC0:basete troenpy=-0.28249453610974656\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11733\n",
      "fle max=10000.661318210996\n",
      "normalizing bias features ...\n",
      "number of ebias features =11733\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 27 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11980,)\n",
      "len of dfs: 11980\n",
      "the vocabulary size =:11980\n",
      "time passed:9548.955919027328\n",
      "DC0:base entropy=-1.5143443910578624, DC0:basete troenpy=-0.28047152888503146\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11980\n",
      "fle max=10000.66039537997\n",
      "normalizing bias features ...\n",
      "number of ebias features =11980\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 28 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11915,)\n",
      "len of dfs: 11915\n",
      "the vocabulary size =:11915\n",
      "time passed:9552.913101196289\n",
      "DC0:base entropy=-1.5260609491468535, DC0:basete troenpy=-0.2709570419671556\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11915\n",
      "fle max=10000.655324764739\n",
      "normalizing bias features ...\n",
      "number of ebias features =11915\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 29 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11870,)\n",
      "len of dfs: 11870\n",
      "the vocabulary size =:11870\n",
      "time passed:9556.815176963806\n",
      "DC0:base entropy=-1.5267705729095102, DC0:basete troenpy=-0.2716401780238468\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11870\n",
      "fle max=10000.655020158078\n",
      "normalizing bias features ...\n",
      "number of ebias features =11870\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 30 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11901,)\n",
      "len of dfs: 11901\n",
      "the vocabulary size =:11901\n",
      "time passed:9560.637027978897\n",
      "DC0:base entropy=-1.5376645485559295, DC0:basete troenpy=-0.26340211899812394\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11901\n",
      "fle max=10000.650379199325\n",
      "normalizing bias features ...\n",
      "number of ebias features =11901\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 31 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11905,)\n",
      "len of dfs: 11905\n",
      "the vocabulary size =:11905\n",
      "time passed:9564.444737195969\n",
      "DC0:base entropy=-1.511831343206949, DC0:basete troenpy=-0.2825441019743885\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11905\n",
      "fle max=10000.661493197515\n",
      "normalizing bias features ...\n",
      "number of ebias features =11905\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 32 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11773,)\n",
      "len of dfs: 11773\n",
      "the vocabulary size =:11773\n",
      "time passed:9568.29539012909\n",
      "DC0:base entropy=-1.5208860696812383, DC0:basete troenpy=-0.2757216264421778\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11773\n",
      "fle max=10000.657554681711\n",
      "normalizing bias features ...\n",
      "number of ebias features =11773\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 33 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12042,)\n",
      "len of dfs: 12042\n",
      "the vocabulary size =:12042\n",
      "time passed:9572.091017007828\n",
      "DC0:base entropy=-1.5245539731006066, DC0:basete troenpy=-0.27298958774621607\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12042\n",
      "fle max=10000.655972576178\n",
      "normalizing bias features ...\n",
      "number of ebias features =12042\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 34 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11936,)\n",
      "len of dfs: 11936\n",
      "the vocabulary size =:11936\n",
      "time passed:9575.941390275955\n",
      "DC0:base entropy=-1.5242697269135195, DC0:basete troenpy=-0.27176619185167245\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11936\n",
      "fle max=10000.65609491013\n",
      "normalizing bias features ...\n",
      "number of ebias features =11936\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 35 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11831,)\n",
      "len of dfs: 11831\n",
      "the vocabulary size =:11831\n",
      "time passed:9579.81760597229\n",
      "DC0:base entropy=-1.5187765219550853, DC0:basete troenpy=-0.2775727487570145\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11831\n",
      "fle max=10000.658468071077\n",
      "normalizing bias features ...\n",
      "number of ebias features =11831\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 36 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11923,)\n",
      "len of dfs: 11923\n",
      "the vocabulary size =:11923\n",
      "time passed:9583.590306282043\n",
      "DC0:base entropy=-1.5243760601639507, DC0:basete troenpy=-0.27363528348263894\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11923\n",
      "fle max=10000.656049141055\n",
      "normalizing bias features ...\n",
      "number of ebias features =11923\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 37 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11948,)\n",
      "len of dfs: 11948\n",
      "the vocabulary size =:11948\n",
      "time passed:9587.439139127731\n",
      "DC0:base entropy=-1.5221385913692547, DC0:basete troenpy=-0.27462420876942856\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11948\n",
      "fle max=10000.657013564354\n",
      "normalizing bias features ...\n",
      "number of ebias features =11948\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 38 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11970,)\n",
      "len of dfs: 11970\n",
      "the vocabulary size =:11970\n",
      "time passed:9591.25383400917\n",
      "DC0:base entropy=-1.5437295192438363, DC0:basete troenpy=-0.25913372902862064\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11970\n",
      "fle max=10000.647823838255\n",
      "normalizing bias features ...\n",
      "number of ebias features =11970\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 39 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11825,)\n",
      "len of dfs: 11825\n",
      "the vocabulary size =:11825\n",
      "time passed:9595.203721046448\n",
      "DC0:base entropy=-1.5276344620633928, DC0:basete troenpy=-0.26938641025117527\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11825\n",
      "fle max=10000.654649714841\n",
      "normalizing bias features ...\n",
      "number of ebias features =11825\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 40 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11888,)\n",
      "len of dfs: 11888\n",
      "the vocabulary size =:11888\n",
      "time passed:9599.065623044968\n",
      "DC0:base entropy=-1.5281712257599764, DC0:basete troenpy=-0.270166971345089\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11888\n",
      "fle max=10000.654419756842\n",
      "normalizing bias features ...\n",
      "number of ebias features =11888\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 41 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11999,)\n",
      "len of dfs: 11999\n",
      "the vocabulary size =:11999\n",
      "time passed:9602.870915174484\n",
      "DC0:base entropy=-1.5315364153056856, DC0:basete troenpy=-0.26733167310643163\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11999\n",
      "fle max=10000.652981730098\n",
      "normalizing bias features ...\n",
      "number of ebias features =11999\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 42 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11912,)\n",
      "len of dfs: 11912\n",
      "the vocabulary size =:11912\n",
      "time passed:9606.748323917389\n",
      "DC0:base entropy=-1.527273038929278, DC0:basete troenpy=-0.2715094232221116\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11912\n",
      "fle max=10000.654804645255\n",
      "normalizing bias features ...\n",
      "number of ebias features =11912\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 43 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11892,)\n",
      "len of dfs: 11892\n",
      "the vocabulary size =:11892\n",
      "time passed:9610.574756860733\n",
      "DC0:base entropy=-1.5009542053635696, DC0:basete troenpy=-0.2879915901737939\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11892\n",
      "fle max=10000.666287235914\n",
      "normalizing bias features ...\n",
      "number of ebias features =11892\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 44 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11738,)\n",
      "len of dfs: 11738\n",
      "the vocabulary size =:11738\n",
      "time passed:9614.402117013931\n",
      "DC0:base entropy=-1.5289156223200533, DC0:basete troenpy=-0.269351177516755\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11738\n",
      "fle max=10000.654101112914\n",
      "normalizing bias features ...\n",
      "number of ebias features =11738\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 45 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12073,)\n",
      "len of dfs: 12073\n",
      "the vocabulary size =:12073\n",
      "time passed:9618.232430934906\n",
      "DC0:base entropy=-1.5244546513534798, DC0:basete troenpy=-0.27130384101988275\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12073\n",
      "fle max=10000.656015317112\n",
      "normalizing bias features ...\n",
      "number of ebias features =12073\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 46 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12053,)\n",
      "len of dfs: 12053\n",
      "the vocabulary size =:12053\n",
      "time passed:9622.244764089584\n",
      "DC0:base entropy=-1.5126247690238541, DC0:basete troenpy=-0.28176917484643565\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12053\n",
      "fle max=10000.66114619772\n",
      "normalizing bias features ...\n",
      "number of ebias features =12053\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 47 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11878,)\n",
      "len of dfs: 11878\n",
      "the vocabulary size =:11878\n",
      "time passed:9626.263444185257\n",
      "DC0:base entropy=-1.5249494459287234, DC0:basete troenpy=-0.2718417449287401\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11878\n",
      "fle max=10000.655802448347\n",
      "normalizing bias features ...\n",
      "number of ebias features =11878\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 48 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12009,)\n",
      "len of dfs: 12009\n",
      "the vocabulary size =:12009\n",
      "time passed:9630.236943006516\n",
      "DC0:base entropy=-1.521525480528905, DC0:basete troenpy=-0.27582745355354854\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12009\n",
      "fle max=10000.657278330616\n",
      "normalizing bias features ...\n",
      "number of ebias features =12009\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 49 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11899,)\n",
      "len of dfs: 11899\n",
      "the vocabulary size =:11899\n",
      "time passed:9634.22509598732\n",
      "DC0:base entropy=-1.531030048527057, DC0:basete troenpy=-0.26703772035897594\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11899\n",
      "fle max=10000.65319770878\n",
      "normalizing bias features ...\n",
      "number of ebias features =11899\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "(0.21414965986394555, 0.03338854236773435)\n"
     ]
    }
   ],
   "source": [
    "dfbbc2=repeatSampling_knn(bbcfn, Nsample=50)\n",
    "df=dfbbc2\n",
    "print((np.mean(df.rncf), np.std(df.rncf)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "394e0540",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "e2426b96",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " ... 0 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11849,)\n",
      "len of dfs: 11849\n",
      "the vocabulary size =:11849\n",
      "time passed:11888.789535999298\n",
      "DC0:base entropy=-1.5207391643830617, DC0:basete troenpy=-0.27618891379434296\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11849\n",
      "fle max=3.08264307080486\n",
      "normalizing bias features ...\n",
      "number of ebias features =11849\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11849,)\n",
      "len of dfs: 11849\n",
      "the vocabulary size =:11849\n",
      "time passed:11892.88344502449\n",
      "DC0:base entropy=-1.5207391643830617, DC0:basete troenpy=-0.27618891379434296\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11849\n",
      "fle max=10000.65761820649\n",
      "normalizing bias features ...\n",
      "number of ebias features =11849\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11849,)\n",
      "len of dfs: 11849\n",
      "the vocabulary size =:11849\n",
      "time passed:11896.918392896652\n",
      "DC0:base entropy=-1.5207391643830617, DC0:basete troenpy=-0.27618891379434296\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11849\n",
      "fle max=34.77483436175216\n",
      "normalizing bias features ...\n",
      "number of ebias features =11849\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11849,)\n",
      "len of dfs: 11849\n",
      "the vocabulary size =:11849\n",
      "time passed:11900.692991018295\n",
      "DC0:base entropy=-1.5207391643830617, DC0:basete troenpy=-0.27618891379434296\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11849\n",
      "fle max=4.606922254080674\n",
      "normalizing bias features ...\n",
      "number of ebias features =11849\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11849,)\n",
      "len of dfs: 11849\n",
      "the vocabulary size =:11849\n",
      "time passed:11904.520290136337\n",
      "DC0:base entropy=-1.5207391643830617, DC0:basete troenpy=-0.27618891379434296\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11849\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 1 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12086,)\n",
      "len of dfs: 12086\n",
      "the vocabulary size =:12086\n",
      "time passed:11908.459026098251\n",
      "DC0:base entropy=-1.5108450135158789, DC0:basete troenpy=-0.2830411825236576\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12086\n",
      "fle max=3.075606838808184\n",
      "normalizing bias features ...\n",
      "number of ebias features =12086\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12086,)\n",
      "len of dfs: 12086\n",
      "the vocabulary size =:12086\n",
      "time passed:11912.395411968231\n",
      "DC0:base entropy=-1.5108450135158789, DC0:basete troenpy=-0.2830411825236576\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12086\n",
      "fle max=10000.661925070779\n",
      "normalizing bias features ...\n",
      "number of ebias features =12086\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12086,)\n",
      "len of dfs: 12086\n",
      "the vocabulary size =:12086\n",
      "time passed:11916.301497936249\n",
      "DC0:base entropy=-1.5108450135158789, DC0:basete troenpy=-0.2830411825236576\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12086\n",
      "fle max=70.67028091267171\n",
      "normalizing bias features ...\n",
      "number of ebias features =12086\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12086,)\n",
      "len of dfs: 12086\n",
      "the vocabulary size =:12086\n",
      "time passed:11920.236815929413\n",
      "DC0:base entropy=-1.5108450135158789, DC0:basete troenpy=-0.2830411825236576\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12086\n",
      "fle max=4.6005292960599675\n",
      "normalizing bias features ...\n",
      "number of ebias features =12086\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12086,)\n",
      "len of dfs: 12086\n",
      "the vocabulary size =:12086\n",
      "time passed:11924.139563083649\n",
      "DC0:base entropy=-1.5108450135158789, DC0:basete troenpy=-0.2830411825236576\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =12086\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 2 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11791,)\n",
      "len of dfs: 11791\n",
      "the vocabulary size =:11791\n",
      "time passed:11928.118639945984\n",
      "DC0:base entropy=-1.5351541253890186, DC0:basete troenpy=-0.2640394956385098\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11791\n",
      "fle max=3.1007136900027876\n",
      "normalizing bias features ...\n",
      "number of ebias features =11791\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11791,)\n",
      "len of dfs: 11791\n",
      "the vocabulary size =:11791\n",
      "time passed:11932.03173995018\n",
      "DC0:base entropy=-1.5351541253890186, DC0:basete troenpy=-0.2640394956385098\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11791\n",
      "fle max=10000.651442827624\n",
      "normalizing bias features ...\n",
      "number of ebias features =11791\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11791,)\n",
      "len of dfs: 11791\n",
      "the vocabulary size =:11791\n",
      "time passed:11935.95444893837\n",
      "DC0:base entropy=-1.5351541253890186, DC0:basete troenpy=-0.2640394956385098\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11791\n",
      "fle max=31.790368577005033\n",
      "normalizing bias features ...\n",
      "number of ebias features =11791\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11791,)\n",
      "len of dfs: 11791\n",
      "the vocabulary size =:11791\n",
      "time passed:11939.867893218994\n",
      "DC0:base entropy=-1.5351541253890186, DC0:basete troenpy=-0.2640394956385098\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11791\n",
      "fle max=4.34157693954423\n",
      "normalizing bias features ...\n",
      "number of ebias features =11791\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11791,)\n",
      "len of dfs: 11791\n",
      "the vocabulary size =:11791\n",
      "time passed:11943.847965955734\n",
      "DC0:base entropy=-1.5351541253890186, DC0:basete troenpy=-0.2640394956385098\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11791\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 3 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11773,)\n",
      "len of dfs: 11773\n",
      "the vocabulary size =:11773\n",
      "time passed:11947.763175964355\n",
      "DC0:base entropy=-1.5316104083089555, DC0:basete troenpy=-0.26683083360783355\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11773\n",
      "fle max=3.0938576367212294\n",
      "normalizing bias features ...\n",
      "number of ebias features =11773\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11773,)\n",
      "len of dfs: 11773\n",
      "the vocabulary size =:11773\n",
      "time passed:11951.548200130463\n",
      "DC0:base entropy=-1.5316104083089555, DC0:basete troenpy=-0.26683083360783355\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11773\n",
      "fle max=10000.652950182104\n",
      "normalizing bias features ...\n",
      "number of ebias features =11773\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11773,)\n",
      "len of dfs: 11773\n",
      "the vocabulary size =:11773\n",
      "time passed:11955.349256038666\n",
      "DC0:base entropy=-1.5316104083089555, DC0:basete troenpy=-0.26683083360783355\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11773\n",
      "fle max=71.67995957205292\n",
      "normalizing bias features ...\n",
      "number of ebias features =11773\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11773,)\n",
      "len of dfs: 11773\n",
      "the vocabulary size =:11773\n",
      "time passed:11959.104543209076\n",
      "DC0:base entropy=-1.5316104083089555, DC0:basete troenpy=-0.26683083360783355\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11773\n",
      "fle max=4.570895926812003\n",
      "normalizing bias features ...\n",
      "number of ebias features =11773\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11773,)\n",
      "len of dfs: 11773\n",
      "the vocabulary size =:11773\n",
      "time passed:11963.003988027573\n",
      "DC0:base entropy=-1.5316104083089555, DC0:basete troenpy=-0.26683083360783355\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11773\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 4 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11962,)\n",
      "len of dfs: 11962\n",
      "the vocabulary size =:11962\n",
      "time passed:11966.810932159424\n",
      "DC0:base entropy=-1.5218688984749293, DC0:basete troenpy=-0.27361566415375077\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11962\n",
      "fle max=3.088049093358265\n",
      "normalizing bias features ...\n",
      "number of ebias features =11962\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11962,)\n",
      "len of dfs: 11962\n",
      "the vocabulary size =:11962\n",
      "time passed:11970.720370054245\n",
      "DC0:base entropy=-1.5218688984749293, DC0:basete troenpy=-0.27361566415375077\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11962\n",
      "fle max=10000.65713000246\n",
      "normalizing bias features ...\n",
      "number of ebias features =11962\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11962,)\n",
      "len of dfs: 11962\n",
      "the vocabulary size =:11962\n",
      "time passed:11974.551311969757\n",
      "DC0:base entropy=-1.5218688984749293, DC0:basete troenpy=-0.27361566415375077\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11962\n",
      "fle max=33.77867331997049\n",
      "normalizing bias features ...\n",
      "number of ebias features =11962\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11962,)\n",
      "len of dfs: 11962\n",
      "the vocabulary size =:11962\n",
      "time passed:11978.399055957794\n",
      "DC0:base entropy=-1.5218688984749293, DC0:basete troenpy=-0.27361566415375077\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11962\n",
      "fle max=4.52211090620311\n",
      "normalizing bias features ...\n",
      "number of ebias features =11962\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11962,)\n",
      "len of dfs: 11962\n",
      "the vocabulary size =:11962\n",
      "time passed:11982.215999126434\n",
      "DC0:base entropy=-1.5218688984749293, DC0:basete troenpy=-0.27361566415375077\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11962\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 5 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11898,)\n",
      "len of dfs: 11898\n",
      "the vocabulary size =:11898\n",
      "time passed:11986.057709217072\n",
      "DC0:base entropy=-1.5298913400530973, DC0:basete troenpy=-0.2688859966334115\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11898\n",
      "fle max=3.088926321417854\n",
      "normalizing bias features ...\n",
      "number of ebias features =11898\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11898,)\n",
      "len of dfs: 11898\n",
      "the vocabulary size =:11898\n",
      "time passed:11989.901616096497\n",
      "DC0:base entropy=-1.5298913400530973, DC0:basete troenpy=-0.2688859966334115\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11898\n",
      "fle max=10000.653683920034\n",
      "normalizing bias features ...\n",
      "number of ebias features =11898\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11898,)\n",
      "len of dfs: 11898\n",
      "the vocabulary size =:11898\n",
      "time passed:11993.825237035751\n",
      "DC0:base entropy=-1.5298913400530973, DC0:basete troenpy=-0.2688859966334115\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11898\n",
      "fle max=36.776544062194525\n",
      "normalizing bias features ...\n",
      "number of ebias features =11898\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11898,)\n",
      "len of dfs: 11898\n",
      "the vocabulary size =:11898\n",
      "time passed:11997.792417049408\n",
      "DC0:base entropy=-1.5298913400530973, DC0:basete troenpy=-0.2688859966334115\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11898\n",
      "fle max=4.559345437781802\n",
      "normalizing bias features ...\n",
      "number of ebias features =11898\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11898,)\n",
      "len of dfs: 11898\n",
      "the vocabulary size =:11898\n",
      "time passed:12001.988635063171\n",
      "DC0:base entropy=-1.5298913400530973, DC0:basete troenpy=-0.2688859966334115\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11898\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 6 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12066,)\n",
      "len of dfs: 12066\n",
      "the vocabulary size =:12066\n",
      "time passed:12006.150533914566\n",
      "DC0:base entropy=-1.5248273677612945, DC0:basete troenpy=-0.2723708062022256\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12066\n",
      "fle max=3.0944805007923044\n",
      "normalizing bias features ...\n",
      "number of ebias features =12066\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12066,)\n",
      "len of dfs: 12066\n",
      "the vocabulary size =:12066\n",
      "time passed:12010.031361103058\n",
      "DC0:base entropy=-1.5248273677612945, DC0:basete troenpy=-0.2723708062022256\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12066\n",
      "fle max=10000.655854955545\n",
      "normalizing bias features ...\n",
      "number of ebias features =12066\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12066,)\n",
      "len of dfs: 12066\n",
      "the vocabulary size =:12066\n",
      "time passed:12013.962270975113\n",
      "DC0:base entropy=-1.5248273677612945, DC0:basete troenpy=-0.2723708062022256\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12066\n",
      "fle max=34.77782547690073\n",
      "normalizing bias features ...\n",
      "number of ebias features =12066\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12066,)\n",
      "len of dfs: 12066\n",
      "the vocabulary size =:12066\n",
      "time passed:12017.971533060074\n",
      "DC0:base entropy=-1.5248273677612945, DC0:basete troenpy=-0.2723708062022256\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12066\n",
      "fle max=4.6031041464885565\n",
      "normalizing bias features ...\n",
      "number of ebias features =12066\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12066,)\n",
      "len of dfs: 12066\n",
      "the vocabulary size =:12066\n",
      "time passed:12021.879694223404\n",
      "DC0:base entropy=-1.5248273677612945, DC0:basete troenpy=-0.2723708062022256\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =12066\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 7 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11963,)\n",
      "len of dfs: 11963\n",
      "the vocabulary size =:11963\n",
      "time passed:12025.805364131927\n",
      "DC0:base entropy=-1.5225537626134806, DC0:basete troenpy=-0.2729820545307371\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11963\n",
      "fle max=3.091011704249789\n",
      "normalizing bias features ...\n",
      "number of ebias features =11963\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11963,)\n",
      "len of dfs: 11963\n",
      "the vocabulary size =:11963\n",
      "time passed:12029.677167892456\n",
      "DC0:base entropy=-1.5225537626134806, DC0:basete troenpy=-0.2729820545307371\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11963\n",
      "fle max=10000.656834397574\n",
      "normalizing bias features ...\n",
      "number of ebias features =11963\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11963,)\n",
      "len of dfs: 11963\n",
      "the vocabulary size =:11963\n",
      "time passed:12033.811393022537\n",
      "DC0:base entropy=-1.5225537626134806, DC0:basete troenpy=-0.2729820545307371\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11963\n",
      "fle max=29.78680312758179\n",
      "normalizing bias features ...\n",
      "number of ebias features =11963\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11963,)\n",
      "len of dfs: 11963\n",
      "the vocabulary size =:11963\n",
      "time passed:12037.877133131027\n",
      "DC0:base entropy=-1.5225537626134806, DC0:basete troenpy=-0.2729820545307371\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11963\n",
      "fle max=4.5356619315720526\n",
      "normalizing bias features ...\n",
      "number of ebias features =11963\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11963,)\n",
      "len of dfs: 11963\n",
      "the vocabulary size =:11963\n",
      "time passed:12041.965862989426\n",
      "DC0:base entropy=-1.5225537626134806, DC0:basete troenpy=-0.2729820545307371\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11963\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 8 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11935,)\n",
      "len of dfs: 11935\n",
      "the vocabulary size =:11935\n",
      "time passed:12045.896841049194\n",
      "DC0:base entropy=-1.5063734196556071, DC0:basete troenpy=-0.287841782671694\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11935\n",
      "fle max=3.0633569278975203\n",
      "normalizing bias features ...\n",
      "number of ebias features =11935\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11935,)\n",
      "len of dfs: 11935\n",
      "the vocabulary size =:11935\n",
      "time passed:12049.676693201065\n",
      "DC0:base entropy=-1.5063734196556071, DC0:basete troenpy=-0.287841782671694\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11935\n",
      "fle max=10000.663890092563\n",
      "normalizing bias features ...\n",
      "number of ebias features =11935\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11935,)\n",
      "len of dfs: 11935\n",
      "the vocabulary size =:11935\n",
      "time passed:12053.64506816864\n",
      "DC0:base entropy=-1.5063734196556071, DC0:basete troenpy=-0.287841782671694\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11935\n",
      "fle max=37.75907808457627\n",
      "normalizing bias features ...\n",
      "number of ebias features =11935\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11935,)\n",
      "len of dfs: 11935\n",
      "the vocabulary size =:11935\n",
      "time passed:12057.62319612503\n",
      "DC0:base entropy=-1.5063734196556071, DC0:basete troenpy=-0.287841782671694\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11935\n",
      "fle max=4.644550609361286\n",
      "normalizing bias features ...\n",
      "number of ebias features =11935\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11935,)\n",
      "len of dfs: 11935\n",
      "the vocabulary size =:11935\n",
      "time passed:12061.677284955978\n",
      "DC0:base entropy=-1.5063734196556071, DC0:basete troenpy=-0.287841782671694\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11935\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 9 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11740,)\n",
      "len of dfs: 11740\n",
      "the vocabulary size =:11740\n",
      "time passed:12065.671581983566\n",
      "DC0:base entropy=-1.5268685817150478, DC0:basete troenpy=-0.2705667211488177\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11740\n",
      "fle max=3.0960025065579435\n",
      "normalizing bias features ...\n",
      "number of ebias features =11740\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11740,)\n",
      "len of dfs: 11740\n",
      "the vocabulary size =:11740\n",
      "time passed:12069.746729135513\n",
      "DC0:base entropy=-1.5268685817150478, DC0:basete troenpy=-0.2705667211488177\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11740\n",
      "fle max=10000.654978109962\n",
      "normalizing bias features ...\n",
      "number of ebias features =11740\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11740,)\n",
      "len of dfs: 11740\n",
      "the vocabulary size =:11740\n",
      "time passed:12073.71035027504\n",
      "DC0:base entropy=-1.5268685817150478, DC0:basete troenpy=-0.2705667211488177\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11740\n",
      "fle max=36.77509523487963\n",
      "normalizing bias features ...\n",
      "number of ebias features =11740\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11740,)\n",
      "len of dfs: 11740\n",
      "the vocabulary size =:11740\n",
      "time passed:12077.720226049423\n",
      "DC0:base entropy=-1.5268685817150478, DC0:basete troenpy=-0.2705667211488177\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11740\n",
      "fle max=4.533246598190133\n",
      "normalizing bias features ...\n",
      "number of ebias features =11740\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11740,)\n",
      "len of dfs: 11740\n",
      "the vocabulary size =:11740\n",
      "time passed:12081.774132013321\n",
      "DC0:base entropy=-1.5268685817150478, DC0:basete troenpy=-0.2705667211488177\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11740\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 10 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11913,)\n",
      "len of dfs: 11913\n",
      "the vocabulary size =:11913\n",
      "time passed:12085.802820920944\n",
      "DC0:base entropy=-1.5193828292392186, DC0:basete troenpy=-0.2767389465188773\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11913\n",
      "fle max=3.0910262697678683\n",
      "normalizing bias features ...\n",
      "number of ebias features =11913\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11913,)\n",
      "len of dfs: 11913\n",
      "the vocabulary size =:11913\n",
      "time passed:12089.834917068481\n",
      "DC0:base entropy=-1.5193828292392186, DC0:basete troenpy=-0.2767389465188773\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11913\n",
      "fle max=10000.658205293152\n",
      "normalizing bias features ...\n",
      "number of ebias features =11913\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11913,)\n",
      "len of dfs: 11913\n",
      "the vocabulary size =:11913\n",
      "time passed:12093.797775030136\n",
      "DC0:base entropy=-1.5193828292392186, DC0:basete troenpy=-0.2767389465188773\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11913\n",
      "fle max=35.77225548653381\n",
      "normalizing bias features ...\n",
      "number of ebias features =11913\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11913,)\n",
      "len of dfs: 11913\n",
      "the vocabulary size =:11913\n",
      "time passed:12097.74962592125\n",
      "DC0:base entropy=-1.5193828292392186, DC0:basete troenpy=-0.2767389465188773\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11913\n",
      "fle max=4.539418823560193\n",
      "normalizing bias features ...\n",
      "number of ebias features =11913\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11913,)\n",
      "len of dfs: 11913\n",
      "the vocabulary size =:11913\n",
      "time passed:12101.76069688797\n",
      "DC0:base entropy=-1.5193828292392186, DC0:basete troenpy=-0.2767389465188773\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11913\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 11 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11888,)\n",
      "len of dfs: 11888\n",
      "the vocabulary size =:11888\n",
      "time passed:12105.850106954575\n",
      "DC0:base entropy=-1.5129167650620194, DC0:basete troenpy=-0.2827993650226365\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11888\n",
      "fle max=3.0726707172086725\n",
      "normalizing bias features ...\n",
      "number of ebias features =11888\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11888,)\n",
      "len of dfs: 11888\n",
      "the vocabulary size =:11888\n",
      "time passed:12109.883468151093\n",
      "DC0:base entropy=-1.5129167650620194, DC0:basete troenpy=-0.2827993650226365\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11888\n",
      "fle max=10000.661018586716\n",
      "normalizing bias features ...\n",
      "number of ebias features =11888\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11888,)\n",
      "len of dfs: 11888\n",
      "the vocabulary size =:11888\n",
      "time passed:12113.979137182236\n",
      "DC0:base entropy=-1.5129167650620194, DC0:basete troenpy=-0.2827993650226365\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11888\n",
      "fle max=34.76952558330286\n",
      "normalizing bias features ...\n",
      "number of ebias features =11888\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11888,)\n",
      "len of dfs: 11888\n",
      "the vocabulary size =:11888\n",
      "time passed:12117.982976198196\n",
      "DC0:base entropy=-1.5129167650620194, DC0:basete troenpy=-0.2827993650226365\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11888\n",
      "fle max=4.613532705308968\n",
      "normalizing bias features ...\n",
      "number of ebias features =11888\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11888,)\n",
      "len of dfs: 11888\n",
      "the vocabulary size =:11888\n",
      "time passed:12121.873731136322\n",
      "DC0:base entropy=-1.5129167650620194, DC0:basete troenpy=-0.2827993650226365\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11888\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 12 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11976,)\n",
      "len of dfs: 11976\n",
      "the vocabulary size =:11976\n",
      "time passed:12125.899150848389\n",
      "DC0:base entropy=-1.5213552680136186, DC0:basete troenpy=-0.2769636714013713\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11976\n",
      "fle max=3.0819538381188893\n",
      "normalizing bias features ...\n",
      "number of ebias features =11976\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11976,)\n",
      "len of dfs: 11976\n",
      "the vocabulary size =:11976\n",
      "time passed:12130.036391973495\n",
      "DC0:base entropy=-1.5213552680136186, DC0:basete troenpy=-0.2769636714013713\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11976\n",
      "fle max=10000.657351873171\n",
      "normalizing bias features ...\n",
      "number of ebias features =11976\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11976,)\n",
      "len of dfs: 11976\n",
      "the vocabulary size =:11976\n",
      "time passed:12133.89368891716\n",
      "DC0:base entropy=-1.5213552680136186, DC0:basete troenpy=-0.2769636714013713\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11976\n",
      "fle max=37.7681108041216\n",
      "normalizing bias features ...\n",
      "number of ebias features =11976\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11976,)\n",
      "len of dfs: 11976\n",
      "the vocabulary size =:11976\n",
      "time passed:12137.83074593544\n",
      "DC0:base entropy=-1.5213552680136186, DC0:basete troenpy=-0.2769636714013713\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11976\n",
      "fle max=4.594451784937681\n",
      "normalizing bias features ...\n",
      "number of ebias features =11976\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11976,)\n",
      "len of dfs: 11976\n",
      "the vocabulary size =:11976\n",
      "time passed:12141.736632108688\n",
      "DC0:base entropy=-1.5213552680136186, DC0:basete troenpy=-0.2769636714013713\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11976\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 13 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11794,)\n",
      "len of dfs: 11794\n",
      "the vocabulary size =:11794\n",
      "time passed:12145.773374080658\n",
      "DC0:base entropy=-1.5307044535913275, DC0:basete troenpy=-0.2685892758310909\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11794\n",
      "fle max=3.0847762202216447\n",
      "normalizing bias features ...\n",
      "number of ebias features =11794\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11794,)\n",
      "len of dfs: 11794\n",
      "the vocabulary size =:11794\n",
      "time passed:12149.66628408432\n",
      "DC0:base entropy=-1.5307044535913275, DC0:basete troenpy=-0.2685892758310909\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11794\n",
      "fle max=10000.653336659027\n",
      "normalizing bias features ...\n",
      "number of ebias features =11794\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11794,)\n",
      "len of dfs: 11794\n",
      "the vocabulary size =:11794\n",
      "time passed:12153.516645908356\n",
      "DC0:base entropy=-1.5307044535913275, DC0:basete troenpy=-0.2685892758310909\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11794\n",
      "fle max=35.77892730583205\n",
      "normalizing bias features ...\n",
      "number of ebias features =11794\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11794,)\n",
      "len of dfs: 11794\n",
      "the vocabulary size =:11794\n",
      "time passed:12157.327156066895\n",
      "DC0:base entropy=-1.5307044535913275, DC0:basete troenpy=-0.2685892758310909\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11794\n",
      "fle max=4.458244017857516\n",
      "normalizing bias features ...\n",
      "number of ebias features =11794\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11794,)\n",
      "len of dfs: 11794\n",
      "the vocabulary size =:11794\n",
      "time passed:12161.140969991684\n",
      "DC0:base entropy=-1.5307044535913275, DC0:basete troenpy=-0.2685892758310909\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11794\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 14 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11741,)\n",
      "len of dfs: 11741\n",
      "the vocabulary size =:11741\n",
      "time passed:12164.927331924438\n",
      "DC0:base entropy=-1.5304578803706244, DC0:basete troenpy=-0.26867576262830234\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11741\n",
      "fle max=3.091219407524706\n",
      "normalizing bias features ...\n",
      "number of ebias features =11741\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11741,)\n",
      "len of dfs: 11741\n",
      "the vocabulary size =:11741\n",
      "time passed:12168.732622861862\n",
      "DC0:base entropy=-1.5304578803706244, DC0:basete troenpy=-0.26867576262830234\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11741\n",
      "fle max=10000.653441925466\n",
      "normalizing bias features ...\n",
      "number of ebias features =11741\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11741,)\n",
      "len of dfs: 11741\n",
      "the vocabulary size =:11741\n",
      "time passed:12172.462765216827\n",
      "DC0:base entropy=-1.5304578803706244, DC0:basete troenpy=-0.26867576262830234\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11741\n",
      "fle max=38.772431351336465\n",
      "normalizing bias features ...\n",
      "number of ebias features =11741\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11741,)\n",
      "len of dfs: 11741\n",
      "the vocabulary size =:11741\n",
      "time passed:12176.206557035446\n",
      "DC0:base entropy=-1.5304578803706244, DC0:basete troenpy=-0.26867576262830234\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11741\n",
      "fle max=4.612481184481987\n",
      "normalizing bias features ...\n",
      "number of ebias features =11741\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11741,)\n",
      "len of dfs: 11741\n",
      "the vocabulary size =:11741\n",
      "time passed:12179.953017950058\n",
      "DC0:base entropy=-1.5304578803706244, DC0:basete troenpy=-0.26867576262830234\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11741\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 15 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11837,)\n",
      "len of dfs: 11837\n",
      "the vocabulary size =:11837\n",
      "time passed:12183.817611217499\n",
      "DC0:base entropy=-1.5203834024151737, DC0:basete troenpy=-0.27636832778775344\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11837\n",
      "fle max=3.0809112200167226\n",
      "normalizing bias features ...\n",
      "number of ebias features =11837\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11837,)\n",
      "len of dfs: 11837\n",
      "the vocabulary size =:11837\n",
      "time passed:12187.655431985855\n",
      "DC0:base entropy=-1.5203834024151737, DC0:basete troenpy=-0.27636832778775344\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11837\n",
      "fle max=10000.657772095921\n",
      "normalizing bias features ...\n",
      "number of ebias features =11837\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11837,)\n",
      "len of dfs: 11837\n",
      "the vocabulary size =:11837\n",
      "time passed:12191.581768989563\n",
      "DC0:base entropy=-1.5203834024151737, DC0:basete troenpy=-0.27636832778775344\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11837\n",
      "fle max=34.774673845678976\n",
      "normalizing bias features ...\n",
      "number of ebias features =11837\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11837,)\n",
      "len of dfs: 11837\n",
      "the vocabulary size =:11837\n",
      "time passed:12195.471214056015\n",
      "DC0:base entropy=-1.5203834024151737, DC0:basete troenpy=-0.27636832778775344\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11837\n",
      "fle max=4.5248635698371125\n",
      "normalizing bias features ...\n",
      "number of ebias features =11837\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11837,)\n",
      "len of dfs: 11837\n",
      "the vocabulary size =:11837\n",
      "time passed:12199.25752401352\n",
      "DC0:base entropy=-1.5203834024151737, DC0:basete troenpy=-0.27636832778775344\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11837\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 16 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11972,)\n",
      "len of dfs: 11972\n",
      "the vocabulary size =:11972\n",
      "time passed:12203.023359060287\n",
      "DC0:base entropy=-1.5237285560638054, DC0:basete troenpy=-0.27308723097431825\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11972\n",
      "fle max=3.089412543038668\n",
      "normalizing bias features ...\n",
      "number of ebias features =11972\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11972,)\n",
      "len of dfs: 11972\n",
      "the vocabulary size =:11972\n",
      "time passed:12206.859507083893\n",
      "DC0:base entropy=-1.5237285560638054, DC0:basete troenpy=-0.27308723097431825\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11972\n",
      "fle max=10000.656327945562\n",
      "normalizing bias features ...\n",
      "number of ebias features =11972\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11972,)\n",
      "len of dfs: 11972\n",
      "the vocabulary size =:11972\n",
      "time passed:12210.710083246231\n",
      "DC0:base entropy=-1.5237285560638054, DC0:basete troenpy=-0.27308723097431825\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11972\n",
      "fle max=28.78867433382145\n",
      "normalizing bias features ...\n",
      "number of ebias features =11972\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11972,)\n",
      "len of dfs: 11972\n",
      "the vocabulary size =:11972\n",
      "time passed:12214.588715076447\n",
      "DC0:base entropy=-1.5237285560638054, DC0:basete troenpy=-0.27308723097431825\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11972\n",
      "fle max=4.477779850365284\n",
      "normalizing bias features ...\n",
      "number of ebias features =11972\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11972,)\n",
      "len of dfs: 11972\n",
      "the vocabulary size =:11972\n",
      "time passed:12218.393364191055\n",
      "DC0:base entropy=-1.5237285560638054, DC0:basete troenpy=-0.27308723097431825\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11972\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 17 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11819,)\n",
      "len of dfs: 11819\n",
      "the vocabulary size =:11819\n",
      "time passed:12222.264873027802\n",
      "DC0:base entropy=-1.523064969113045, DC0:basete troenpy=-0.2747056011600164\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11819\n",
      "fle max=3.0866474681831777\n",
      "normalizing bias features ...\n",
      "number of ebias features =11819\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11819,)\n",
      "len of dfs: 11819\n",
      "the vocabulary size =:11819\n",
      "time passed:12226.047960281372\n",
      "DC0:base entropy=-1.523064969113045, DC0:basete troenpy=-0.2747056011600164\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11819\n",
      "fle max=10000.656613921055\n",
      "normalizing bias features ...\n",
      "number of ebias features =11819\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11819,)\n",
      "len of dfs: 11819\n",
      "the vocabulary size =:11819\n",
      "time passed:12229.876718997955\n",
      "DC0:base entropy=-1.523064969113045, DC0:basete troenpy=-0.2747056011600164\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11819\n",
      "fle max=38.76761983708041\n",
      "normalizing bias features ...\n",
      "number of ebias features =11819\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11819,)\n",
      "len of dfs: 11819\n",
      "the vocabulary size =:11819\n",
      "time passed:12233.709549188614\n",
      "DC0:base entropy=-1.523064969113045, DC0:basete troenpy=-0.2747056011600164\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11819\n",
      "fle max=4.578770694364186\n",
      "normalizing bias features ...\n",
      "number of ebias features =11819\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11819,)\n",
      "len of dfs: 11819\n",
      "the vocabulary size =:11819\n",
      "time passed:12237.48201417923\n",
      "DC0:base entropy=-1.523064969113045, DC0:basete troenpy=-0.2747056011600164\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11819\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 18 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11900,)\n",
      "len of dfs: 11900\n",
      "the vocabulary size =:11900\n",
      "time passed:12241.323291063309\n",
      "DC0:base entropy=-1.5157193083732037, DC0:basete troenpy=-0.2788584885080255\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11900\n",
      "fle max=3.0859544259144798\n",
      "normalizing bias features ...\n",
      "number of ebias features =11900\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11900,)\n",
      "len of dfs: 11900\n",
      "the vocabulary size =:11900\n",
      "time passed:12245.164685249329\n",
      "DC0:base entropy=-1.5157193083732037, DC0:basete troenpy=-0.2788584885080255\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11900\n",
      "fle max=10000.659796292166\n",
      "normalizing bias features ...\n",
      "number of ebias features =11900\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11900,)\n",
      "len of dfs: 11900\n",
      "the vocabulary size =:11900\n",
      "time passed:12249.0270049572\n",
      "DC0:base entropy=-1.5157193083732037, DC0:basete troenpy=-0.2788584885080255\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11900\n",
      "fle max=24.790694234219778\n",
      "normalizing bias features ...\n",
      "number of ebias features =11900\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11900,)\n",
      "len of dfs: 11900\n",
      "the vocabulary size =:11900\n",
      "time passed:12252.965035200119\n",
      "DC0:base entropy=-1.5157193083732037, DC0:basete troenpy=-0.2788584885080255\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11900\n",
      "fle max=4.555524607524081\n",
      "normalizing bias features ...\n",
      "number of ebias features =11900\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11900,)\n",
      "len of dfs: 11900\n",
      "the vocabulary size =:11900\n",
      "time passed:12256.980401992798\n",
      "DC0:base entropy=-1.5157193083732037, DC0:basete troenpy=-0.2788584885080255\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11900\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 19 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11781,)\n",
      "len of dfs: 11781\n",
      "the vocabulary size =:11781\n",
      "time passed:12260.858679056168\n",
      "DC0:base entropy=-1.5154777533793777, DC0:basete troenpy=-0.280038202135121\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11781\n",
      "fle max=3.073426714072572\n",
      "normalizing bias features ...\n",
      "number of ebias features =11781\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11781,)\n",
      "len of dfs: 11781\n",
      "the vocabulary size =:11781\n",
      "time passed:12264.716495037079\n",
      "DC0:base entropy=-1.5154777533793777, DC0:basete troenpy=-0.280038202135121\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11781\n",
      "fle max=10000.65990146534\n",
      "normalizing bias features ...\n",
      "number of ebias features =11781\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11781,)\n",
      "len of dfs: 11781\n",
      "the vocabulary size =:11781\n",
      "time passed:12268.578073978424\n",
      "DC0:base entropy=-1.5154777533793777, DC0:basete troenpy=-0.280038202135121\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11781\n",
      "fle max=39.76097898756867\n",
      "normalizing bias features ...\n",
      "number of ebias features =11781\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11781,)\n",
      "len of dfs: 11781\n",
      "the vocabulary size =:11781\n",
      "time passed:12272.337261915207\n",
      "DC0:base entropy=-1.5154777533793777, DC0:basete troenpy=-0.280038202135121\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11781\n",
      "fle max=4.556704321151177\n",
      "normalizing bias features ...\n",
      "number of ebias features =11781\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11781,)\n",
      "len of dfs: 11781\n",
      "the vocabulary size =:11781\n",
      "time passed:12276.121015071869\n",
      "DC0:base entropy=-1.5154777533793777, DC0:basete troenpy=-0.280038202135121\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11781\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 20 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12039,)\n",
      "len of dfs: 12039\n",
      "the vocabulary size =:12039\n",
      "time passed:12279.876511096954\n",
      "DC0:base entropy=-1.5176918152804337, DC0:basete troenpy=-0.279135049929631\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12039\n",
      "fle max=3.0797319757455375\n",
      "normalizing bias features ...\n",
      "number of ebias features =12039\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12039,)\n",
      "len of dfs: 12039\n",
      "the vocabulary size =:12039\n",
      "time passed:12283.75638794899\n",
      "DC0:base entropy=-1.5176918152804337, DC0:basete troenpy=-0.279135049929631\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12039\n",
      "fle max=10000.658938714569\n",
      "normalizing bias features ...\n",
      "number of ebias features =12039\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12039,)\n",
      "len of dfs: 12039\n",
      "the vocabulary size =:12039\n",
      "time passed:12287.642021894455\n",
      "DC0:base entropy=-1.5176918152804337, DC0:basete troenpy=-0.279135049929631\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12039\n",
      "fle max=34.77252832890247\n",
      "normalizing bias features ...\n",
      "number of ebias features =12039\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12039,)\n",
      "len of dfs: 12039\n",
      "the vocabulary size =:12039\n",
      "time passed:12291.483669042587\n",
      "DC0:base entropy=-1.5176918152804337, DC0:basete troenpy=-0.279135049929631\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12039\n",
      "fle max=4.648582902396653\n",
      "normalizing bias features ...\n",
      "number of ebias features =12039\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12039,)\n",
      "len of dfs: 12039\n",
      "the vocabulary size =:12039\n",
      "time passed:12295.393049955368\n",
      "DC0:base entropy=-1.5176918152804337, DC0:basete troenpy=-0.279135049929631\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =12039\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 21 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11897,)\n",
      "len of dfs: 11897\n",
      "the vocabulary size =:11897\n",
      "time passed:12299.245463132858\n",
      "DC0:base entropy=-1.5140767568854119, DC0:basete troenpy=-0.2789578226927697\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11897\n",
      "fle max=3.0841649749435103\n",
      "normalizing bias features ...\n",
      "number of ebias features =11897\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11897,)\n",
      "len of dfs: 11897\n",
      "the vocabulary size =:11897\n",
      "time passed:12303.03575205803\n",
      "DC0:base entropy=-1.5140767568854119, DC0:basete troenpy=-0.2789578226927697\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11897\n",
      "fle max=10000.660512121769\n",
      "normalizing bias features ...\n",
      "number of ebias features =11897\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11897,)\n",
      "len of dfs: 11897\n",
      "the vocabulary size =:11897\n",
      "time passed:12306.934903144836\n",
      "DC0:base entropy=-1.5140767568854119, DC0:basete troenpy=-0.2789578226927697\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11897\n",
      "fle max=37.765918368966624\n",
      "normalizing bias features ...\n",
      "number of ebias features =11897\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11897,)\n",
      "len of dfs: 11897\n",
      "the vocabulary size =:11897\n",
      "time passed:12310.794404029846\n",
      "DC0:base entropy=-1.5140767568854119, DC0:basete troenpy=-0.2789578226927697\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11897\n",
      "fle max=4.5416376997340855\n",
      "normalizing bias features ...\n",
      "number of ebias features =11897\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11897,)\n",
      "len of dfs: 11897\n",
      "the vocabulary size =:11897\n",
      "time passed:12314.601770162582\n",
      "DC0:base entropy=-1.5140767568854119, DC0:basete troenpy=-0.2789578226927697\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11897\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 22 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11833,)\n",
      "len of dfs: 11833\n",
      "the vocabulary size =:11833\n",
      "time passed:12318.478189229965\n",
      "DC0:base entropy=-1.527419884886716, DC0:basete troenpy=-0.2723793335914075\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11833\n",
      "fle max=3.081498761036231\n",
      "normalizing bias features ...\n",
      "number of ebias features =11833\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11833,)\n",
      "len of dfs: 11833\n",
      "the vocabulary size =:11833\n",
      "time passed:12322.268879890442\n",
      "DC0:base entropy=-1.527419884886716, DC0:basete troenpy=-0.2723793335914075\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11833\n",
      "fle max=10000.654741688297\n",
      "normalizing bias features ...\n",
      "number of ebias features =11833\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11833,)\n",
      "len of dfs: 11833\n",
      "the vocabulary size =:11833\n",
      "time passed:12326.1032269001\n",
      "DC0:base entropy=-1.527419884886716, DC0:basete troenpy=-0.2723793335914075\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11833\n",
      "fle max=36.77397240978331\n",
      "normalizing bias features ...\n",
      "number of ebias features =11833\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11833,)\n",
      "len of dfs: 11833\n",
      "the vocabulary size =:11833\n",
      "time passed:12329.893102884293\n",
      "DC0:base entropy=-1.527419884886716, DC0:basete troenpy=-0.2723793335914075\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11833\n",
      "fle max=4.576444426795578\n",
      "normalizing bias features ...\n",
      "number of ebias features =11833\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11833,)\n",
      "len of dfs: 11833\n",
      "the vocabulary size =:11833\n",
      "time passed:12333.651203870773\n",
      "DC0:base entropy=-1.527419884886716, DC0:basete troenpy=-0.2723793335914075\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11833\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 23 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12068,)\n",
      "len of dfs: 12068\n",
      "the vocabulary size =:12068\n",
      "time passed:12337.578269004822\n",
      "DC0:base entropy=-1.5264254511978397, DC0:basete troenpy=-0.27228978219395467\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12068\n",
      "fle max=3.0930030231826\n",
      "normalizing bias features ...\n",
      "number of ebias features =12068\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12068,)\n",
      "len of dfs: 12068\n",
      "the vocabulary size =:12068\n",
      "time passed:12341.563892126083\n",
      "DC0:base entropy=-1.5264254511978397, DC0:basete troenpy=-0.27228978219395467\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12068\n",
      "fle max=10000.655168266516\n",
      "normalizing bias features ...\n",
      "number of ebias features =12068\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12068,)\n",
      "len of dfs: 12068\n",
      "the vocabulary size =:12068\n",
      "time passed:12345.429848909378\n",
      "DC0:base entropy=-1.5264254511978397, DC0:basete troenpy=-0.27228978219395467\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12068\n",
      "fle max=48.74560889254744\n",
      "normalizing bias features ...\n",
      "number of ebias features =12068\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12068,)\n",
      "len of dfs: 12068\n",
      "the vocabulary size =:12068\n",
      "time passed:12349.33846616745\n",
      "DC0:base entropy=-1.5264254511978397, DC0:basete troenpy=-0.27228978219395467\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12068\n",
      "fle max=4.520785024243314\n",
      "normalizing bias features ...\n",
      "number of ebias features =12068\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12068,)\n",
      "len of dfs: 12068\n",
      "the vocabulary size =:12068\n",
      "time passed:12353.181585073471\n",
      "DC0:base entropy=-1.5264254511978397, DC0:basete troenpy=-0.27228978219395467\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =12068\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 24 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11795,)\n",
      "len of dfs: 11795\n",
      "the vocabulary size =:11795\n",
      "time passed:12357.046115875244\n",
      "DC0:base entropy=-1.5279404211495138, DC0:basete troenpy=-0.27064812973709057\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11795\n",
      "fle max=3.0966573374772883\n",
      "normalizing bias features ...\n",
      "number of ebias features =11795\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11795,)\n",
      "len of dfs: 11795\n",
      "the vocabulary size =:11795\n",
      "time passed:12360.910959005356\n",
      "DC0:base entropy=-1.5279404211495138, DC0:basete troenpy=-0.27064812973709057\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11795\n",
      "fle max=10000.65451861736\n",
      "normalizing bias features ...\n",
      "number of ebias features =11795\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11795,)\n",
      "len of dfs: 11795\n",
      "the vocabulary size =:11795\n",
      "time passed:12364.72962808609\n",
      "DC0:base entropy=-1.5279404211495138, DC0:basete troenpy=-0.27064812973709057\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11795\n",
      "fle max=36.77516627484907\n",
      "normalizing bias features ...\n",
      "number of ebias features =11795\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11795,)\n",
      "len of dfs: 11795\n",
      "the vocabulary size =:11795\n",
      "time passed:12368.493165254593\n",
      "DC0:base entropy=-1.5279404211495138, DC0:basete troenpy=-0.27064812973709057\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11795\n",
      "fle max=4.561107570885482\n",
      "normalizing bias features ...\n",
      "number of ebias features =11795\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11795,)\n",
      "len of dfs: 11795\n",
      "the vocabulary size =:11795\n",
      "time passed:12372.344684123993\n",
      "DC0:base entropy=-1.5279404211495138, DC0:basete troenpy=-0.27064812973709057\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11795\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 25 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11795,)\n",
      "len of dfs: 11795\n",
      "the vocabulary size =:11795\n",
      "time passed:12376.177676200867\n",
      "DC0:base entropy=-1.5150707705921687, DC0:basete troenpy=-0.28124894731708167\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11795\n",
      "fle max=3.069350649050665\n",
      "normalizing bias features ...\n",
      "number of ebias features =11795\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11795,)\n",
      "len of dfs: 11795\n",
      "the vocabulary size =:11795\n",
      "time passed:12380.003924131393\n",
      "DC0:base entropy=-1.5150707705921687, DC0:basete troenpy=-0.28124894731708167\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11795\n",
      "fle max=10000.660078741723\n",
      "normalizing bias features ...\n",
      "number of ebias features =11795\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11795,)\n",
      "len of dfs: 11795\n",
      "the vocabulary size =:11795\n",
      "time passed:12383.737617969513\n",
      "DC0:base entropy=-1.5150707705921687, DC0:basete troenpy=-0.28124894731708167\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11795\n",
      "fle max=35.76876007469775\n",
      "normalizing bias features ...\n",
      "number of ebias features =11795\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11795,)\n",
      "len of dfs: 11795\n",
      "the vocabulary size =:11795\n",
      "time passed:12387.6119120121\n",
      "DC0:base entropy=-1.5150707705921687, DC0:basete troenpy=-0.28124894731708167\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11795\n",
      "fle max=4.571708388465472\n",
      "normalizing bias features ...\n",
      "number of ebias features =11795\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11795,)\n",
      "len of dfs: 11795\n",
      "the vocabulary size =:11795\n",
      "time passed:12391.394904851913\n",
      "DC0:base entropy=-1.5150707705921687, DC0:basete troenpy=-0.28124894731708167\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11795\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 26 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11907,)\n",
      "len of dfs: 11907\n",
      "the vocabulary size =:11907\n",
      "time passed:12395.185749053955\n",
      "DC0:base entropy=-1.5240120584056933, DC0:basete troenpy=-0.2743762229514923\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11907\n",
      "fle max=3.0853176777149214\n",
      "normalizing bias features ...\n",
      "number of ebias features =11907\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11907,)\n",
      "len of dfs: 11907\n",
      "the vocabulary size =:11907\n",
      "time passed:12399.00006198883\n",
      "DC0:base entropy=-1.5240120584056933, DC0:basete troenpy=-0.2743762229514923\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11907\n",
      "fle max=10000.656205845005\n",
      "normalizing bias features ...\n",
      "number of ebias features =11907\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11907,)\n",
      "len of dfs: 11907\n",
      "the vocabulary size =:11907\n",
      "time passed:12402.829336166382\n",
      "DC0:base entropy=-1.5240120584056933, DC0:basete troenpy=-0.2743762229514923\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11907\n",
      "fle max=29.786059849686644\n",
      "normalizing bias features ...\n",
      "number of ebias features =11907\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11907,)\n",
      "len of dfs: 11907\n",
      "the vocabulary size =:11907\n",
      "time passed:12406.725574970245\n",
      "DC0:base entropy=-1.5240120584056933, DC0:basete troenpy=-0.2743762229514923\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11907\n",
      "fle max=4.618181644805176\n",
      "normalizing bias features ...\n",
      "number of ebias features =11907\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11907,)\n",
      "len of dfs: 11907\n",
      "the vocabulary size =:11907\n",
      "time passed:12410.605731248856\n",
      "DC0:base entropy=-1.5240120584056933, DC0:basete troenpy=-0.2743762229514923\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11907\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 27 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11880,)\n",
      "len of dfs: 11880\n",
      "the vocabulary size =:11880\n",
      "time passed:12414.455659151077\n",
      "DC0:base entropy=-1.5151440590913152, DC0:basete troenpy=-0.28020159324852795\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11880\n",
      "fle max=3.077650695408145\n",
      "normalizing bias features ...\n",
      "number of ebias features =11880\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11880,)\n",
      "len of dfs: 11880\n",
      "the vocabulary size =:11880\n",
      "time passed:12418.307744026184\n",
      "DC0:base entropy=-1.5151440590913152, DC0:basete troenpy=-0.28020159324852795\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11880\n",
      "fle max=10000.66004681118\n",
      "normalizing bias features ...\n",
      "number of ebias features =11880\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11880,)\n",
      "len of dfs: 11880\n",
      "the vocabulary size =:11880\n",
      "time passed:12422.149949073792\n",
      "DC0:base entropy=-1.5151440590913152, DC0:basete troenpy=-0.28020159324852795\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11880\n",
      "fle max=28.782962946974703\n",
      "normalizing bias features ...\n",
      "number of ebias features =11880\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11880,)\n",
      "len of dfs: 11880\n",
      "the vocabulary size =:11880\n",
      "time passed:12425.948762893677\n",
      "DC0:base entropy=-1.5151440590913152, DC0:basete troenpy=-0.28020159324852795\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11880\n",
      "fle max=4.610934933534859\n",
      "normalizing bias features ...\n",
      "number of ebias features =11880\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11880,)\n",
      "len of dfs: 11880\n",
      "the vocabulary size =:11880\n",
      "time passed:12429.738635063171\n",
      "DC0:base entropy=-1.5151440590913152, DC0:basete troenpy=-0.28020159324852795\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11880\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 28 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11770,)\n",
      "len of dfs: 11770\n",
      "the vocabulary size =:11770\n",
      "time passed:12433.62205195427\n",
      "DC0:base entropy=-1.5262244478774845, DC0:basete troenpy=-0.27183430452898133\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11770\n",
      "fle max=3.0919542819960277\n",
      "normalizing bias features ...\n",
      "number of ebias features =11770\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11770,)\n",
      "len of dfs: 11770\n",
      "the vocabulary size =:11770\n",
      "time passed:12437.463912010193\n",
      "DC0:base entropy=-1.5262244478774845, DC0:basete troenpy=-0.27183430452898133\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11770\n",
      "fle max=10000.655254557641\n",
      "normalizing bias features ...\n",
      "number of ebias features =11770\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11770,)\n",
      "len of dfs: 11770\n",
      "the vocabulary size =:11770\n",
      "time passed:12441.233912229538\n",
      "DC0:base entropy=-1.5262244478774845, DC0:basete troenpy=-0.27183430452898133\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11770\n",
      "fle max=34.77834055292806\n",
      "normalizing bias features ...\n",
      "number of ebias features =11770\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11770,)\n",
      "len of dfs: 11770\n",
      "the vocabulary size =:11770\n",
      "time passed:12445.068640232086\n",
      "DC0:base entropy=-1.5262244478774845, DC0:basete troenpy=-0.27183430452898133\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11770\n",
      "fle max=4.562293745677373\n",
      "normalizing bias features ...\n",
      "number of ebias features =11770\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11770,)\n",
      "len of dfs: 11770\n",
      "the vocabulary size =:11770\n",
      "time passed:12448.838494062424\n",
      "DC0:base entropy=-1.5262244478774845, DC0:basete troenpy=-0.27183430452898133\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11770\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 29 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11734,)\n",
      "len of dfs: 11734\n",
      "the vocabulary size =:11734\n",
      "time passed:12452.615320205688\n",
      "DC0:base entropy=-1.5298842955857859, DC0:basete troenpy=-0.26934557236398515\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11734\n",
      "fle max=3.0954074457713476\n",
      "normalizing bias features ...\n",
      "number of ebias features =11734\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11734,)\n",
      "len of dfs: 11734\n",
      "the vocabulary size =:11734\n",
      "time passed:12456.429149150848\n",
      "DC0:base entropy=-1.5298842955857859, DC0:basete troenpy=-0.26934557236398515\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11734\n",
      "fle max=10000.653686930167\n",
      "normalizing bias features ...\n",
      "number of ebias features =11734\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11734,)\n",
      "len of dfs: 11734\n",
      "the vocabulary size =:11734\n",
      "time passed:12460.169814109802\n",
      "DC0:base entropy=-1.5298842955857859, DC0:basete troenpy=-0.26934557236398515\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11734\n",
      "fle max=26.795316088500797\n",
      "normalizing bias features ...\n",
      "number of ebias features =11734\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11734,)\n",
      "len of dfs: 11734\n",
      "the vocabulary size =:11734\n",
      "time passed:12463.921029090881\n",
      "DC0:base entropy=-1.5298842955857859, DC0:basete troenpy=-0.26934557236398515\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11734\n",
      "fle max=4.474038191754951\n",
      "normalizing bias features ...\n",
      "number of ebias features =11734\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11734,)\n",
      "len of dfs: 11734\n",
      "the vocabulary size =:11734\n",
      "time passed:12467.719803094864\n",
      "DC0:base entropy=-1.5298842955857859, DC0:basete troenpy=-0.26934557236398515\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11734\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 30 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11872,)\n",
      "len of dfs: 11872\n",
      "the vocabulary size =:11872\n",
      "time passed:12471.459687948227\n",
      "DC0:base entropy=-1.5188316612401038, DC0:basete troenpy=-0.2765473573675986\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11872\n",
      "fle max=3.0790382785216943\n",
      "normalizing bias features ...\n",
      "number of ebias features =11872\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11872,)\n",
      "len of dfs: 11872\n",
      "the vocabulary size =:11872\n",
      "time passed:12475.367626190186\n",
      "DC0:base entropy=-1.5188316612401038, DC0:basete troenpy=-0.2765473573675986\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11872\n",
      "fle max=10000.658444164641\n",
      "normalizing bias features ...\n",
      "number of ebias features =11872\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11872,)\n",
      "len of dfs: 11872\n",
      "the vocabulary size =:11872\n",
      "time passed:12479.180280208588\n",
      "DC0:base entropy=-1.5188316612401038, DC0:basete troenpy=-0.2765473573675986\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11872\n",
      "fle max=37.76808239861186\n",
      "normalizing bias features ...\n",
      "number of ebias features =11872\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11872,)\n",
      "len of dfs: 11872\n",
      "the vocabulary size =:11872\n",
      "time passed:12483.087511062622\n",
      "DC0:base entropy=-1.5188316612401038, DC0:basete troenpy=-0.2765473573675986\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11872\n",
      "fle max=4.580612450571769\n",
      "normalizing bias features ...\n",
      "number of ebias features =11872\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11872,)\n",
      "len of dfs: 11872\n",
      "the vocabulary size =:11872\n",
      "time passed:12486.889939069748\n",
      "DC0:base entropy=-1.5188316612401038, DC0:basete troenpy=-0.2765473573675986\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11872\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 31 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12083,)\n",
      "len of dfs: 12083\n",
      "the vocabulary size =:12083\n",
      "time passed:12490.81590795517\n",
      "DC0:base entropy=-1.5214635993390029, DC0:basete troenpy=-0.2753239913465744\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12083\n",
      "fle max=3.090308070647504\n",
      "normalizing bias features ...\n",
      "number of ebias features =12083\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12083,)\n",
      "len of dfs: 12083\n",
      "the vocabulary size =:12083\n",
      "time passed:12494.712080955505\n",
      "DC0:base entropy=-1.5214635993390029, DC0:basete troenpy=-0.2753239913465744\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12083\n",
      "fle max=10000.657305065295\n",
      "normalizing bias features ...\n",
      "number of ebias features =12083\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12083,)\n",
      "len of dfs: 12083\n",
      "the vocabulary size =:12083\n",
      "time passed:12498.646332025528\n",
      "DC0:base entropy=-1.5214635993390029, DC0:basete troenpy=-0.2753239913465744\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12083\n",
      "fle max=34.77548936578484\n",
      "normalizing bias features ...\n",
      "number of ebias features =12083\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12083,)\n",
      "len of dfs: 12083\n",
      "the vocabulary size =:12083\n",
      "time passed:12502.594326019287\n",
      "DC0:base entropy=-1.5214635993390029, DC0:basete troenpy=-0.2753239913465744\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12083\n",
      "fle max=4.619129413200259\n",
      "normalizing bias features ...\n",
      "number of ebias features =12083\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12083,)\n",
      "len of dfs: 12083\n",
      "the vocabulary size =:12083\n",
      "time passed:12506.62004518509\n",
      "DC0:base entropy=-1.5214635993390029, DC0:basete troenpy=-0.2753239913465744\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =12083\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 32 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11942,)\n",
      "len of dfs: 11942\n",
      "the vocabulary size =:11942\n",
      "time passed:12510.57202219963\n",
      "DC0:base entropy=-1.5287165182460933, DC0:basete troenpy=-0.2698236360613818\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11942\n",
      "fle max=3.0956792019897215\n",
      "normalizing bias features ...\n",
      "number of ebias features =11942\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11942,)\n",
      "len of dfs: 11942\n",
      "the vocabulary size =:11942\n",
      "time passed:12514.423241138458\n",
      "DC0:base entropy=-1.5287165182460933, DC0:basete troenpy=-0.2698236360613818\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11942\n",
      "fle max=10000.654186310343\n",
      "normalizing bias features ...\n",
      "number of ebias features =11942\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11942,)\n",
      "len of dfs: 11942\n",
      "the vocabulary size =:11942\n",
      "time passed:12518.354116916656\n",
      "DC0:base entropy=-1.5287165182460933, DC0:basete troenpy=-0.2698236360613818\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11942\n",
      "fle max=33.781959453111035\n",
      "normalizing bias features ...\n",
      "number of ebias features =11942\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11942,)\n",
      "len of dfs: 11942\n",
      "the vocabulary size =:11942\n",
      "time passed:12522.247967004776\n",
      "DC0:base entropy=-1.5287165182460933, DC0:basete troenpy=-0.2698236360613818\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11942\n",
      "fle max=4.560283077209773\n",
      "normalizing bias features ...\n",
      "number of ebias features =11942\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11942,)\n",
      "len of dfs: 11942\n",
      "the vocabulary size =:11942\n",
      "time passed:12526.176488161087\n",
      "DC0:base entropy=-1.5287165182460933, DC0:basete troenpy=-0.2698236360613818\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11942\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 33 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11886,)\n",
      "len of dfs: 11886\n",
      "the vocabulary size =:11886\n",
      "time passed:12530.541828870773\n",
      "DC0:base entropy=-1.5260238285754297, DC0:basete troenpy=-0.27156167210652493\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11886\n",
      "fle max=3.0891002712258855\n",
      "normalizing bias features ...\n",
      "number of ebias features =11886\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11886,)\n",
      "len of dfs: 11886\n",
      "the vocabulary size =:11886\n",
      "time passed:12534.386429071426\n",
      "DC0:base entropy=-1.5260238285754297, DC0:basete troenpy=-0.27156167210652493\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11886\n",
      "fle max=10000.655340706577\n",
      "normalizing bias features ...\n",
      "number of ebias features =11886\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11886,)\n",
      "len of dfs: 11886\n",
      "the vocabulary size =:11886\n",
      "time passed:12538.223432064056\n",
      "DC0:base entropy=-1.5260238285754297, DC0:basete troenpy=-0.27156167210652493\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11886\n",
      "fle max=35.776441107876515\n",
      "normalizing bias features ...\n",
      "number of ebias features =11886\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11886,)\n",
      "len of dfs: 11886\n",
      "the vocabulary size =:11886\n",
      "time passed:12542.090224981308\n",
      "DC0:base entropy=-1.5260238285754297, DC0:basete troenpy=-0.27156167210652493\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11886\n",
      "fle max=4.562021113254916\n",
      "normalizing bias features ...\n",
      "number of ebias features =11886\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11886,)\n",
      "len of dfs: 11886\n",
      "the vocabulary size =:11886\n",
      "time passed:12545.951649188995\n",
      "DC0:base entropy=-1.5260238285754297, DC0:basete troenpy=-0.27156167210652493\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11886\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 34 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11817,)\n",
      "len of dfs: 11817\n",
      "the vocabulary size =:11817\n",
      "time passed:12549.792829990387\n",
      "DC0:base entropy=-1.5302133727568121, DC0:basete troenpy=-0.26866341318273096\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11817\n",
      "fle max=3.0986316865582593\n",
      "normalizing bias features ...\n",
      "number of ebias features =11817\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11817,)\n",
      "len of dfs: 11817\n",
      "the vocabulary size =:11817\n",
      "time passed:12553.63802909851\n",
      "DC0:base entropy=-1.5302133727568121, DC0:basete troenpy=-0.26866341318273096\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11817\n",
      "fle max=10000.653546343561\n",
      "normalizing bias features ...\n",
      "number of ebias features =11817\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11817,)\n",
      "len of dfs: 11817\n",
      "the vocabulary size =:11817\n",
      "time passed:12557.448793888092\n",
      "DC0:base entropy=-1.5302133727568121, DC0:basete troenpy=-0.26866341318273096\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11817\n",
      "fle max=33.78289039069486\n",
      "normalizing bias features ...\n",
      "number of ebias features =11817\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11817,)\n",
      "len of dfs: 11817\n",
      "the vocabulary size =:11817\n",
      "time passed:12561.270035028458\n",
      "DC0:base entropy=-1.5302133727568121, DC0:basete troenpy=-0.26866341318273096\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11817\n",
      "fle max=4.545329532198786\n",
      "normalizing bias features ...\n",
      "number of ebias features =11817\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11817,)\n",
      "len of dfs: 11817\n",
      "the vocabulary size =:11817\n",
      "time passed:12565.059689998627\n",
      "DC0:base entropy=-1.5302133727568121, DC0:basete troenpy=-0.26866341318273096\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11817\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 35 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11820,)\n",
      "len of dfs: 11820\n",
      "the vocabulary size =:11820\n",
      "time passed:12568.876909971237\n",
      "DC0:base entropy=-1.5307044535913275, DC0:basete troenpy=-0.2685892758310909\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11820\n",
      "fle max=3.093120328863727\n",
      "normalizing bias features ...\n",
      "number of ebias features =11820\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11820,)\n",
      "len of dfs: 11820\n",
      "the vocabulary size =:11820\n",
      "time passed:12572.649204015732\n",
      "DC0:base entropy=-1.5307044535913275, DC0:basete troenpy=-0.2685892758310909\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11820\n",
      "fle max=10000.653336659027\n",
      "normalizing bias features ...\n",
      "number of ebias features =11820\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11820,)\n",
      "len of dfs: 11820\n",
      "the vocabulary size =:11820\n",
      "time passed:12576.460008144379\n",
      "DC0:base entropy=-1.5307044535913275, DC0:basete troenpy=-0.2685892758310909\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11820\n",
      "fle max=29.790627910796157\n",
      "normalizing bias features ...\n",
      "number of ebias features =11820\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11820,)\n",
      "len of dfs: 11820\n",
      "the vocabulary size =:11820\n",
      "time passed:12580.250336170197\n",
      "DC0:base entropy=-1.5307044535913275, DC0:basete troenpy=-0.2685892758310909\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11820\n",
      "fle max=4.545255394847146\n",
      "normalizing bias features ...\n",
      "number of ebias features =11820\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11820,)\n",
      "len of dfs: 11820\n",
      "the vocabulary size =:11820\n",
      "time passed:12584.114120960236\n",
      "DC0:base entropy=-1.5307044535913275, DC0:basete troenpy=-0.2685892758310909\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11820\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 36 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11772,)\n",
      "len of dfs: 11772\n",
      "the vocabulary size =:11772\n",
      "time passed:12587.998145103455\n",
      "DC0:base entropy=-1.5306218843328774, DC0:basete troenpy=-0.2665284809370888\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11772\n",
      "fle max=3.101079198667769\n",
      "normalizing bias features ...\n",
      "number of ebias features =11772\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11772,)\n",
      "len of dfs: 11772\n",
      "the vocabulary size =:11772\n",
      "time passed:12591.758275270462\n",
      "DC0:base entropy=-1.5306218843328774, DC0:basete troenpy=-0.2665284809370888\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11772\n",
      "fle max=10000.653371905515\n",
      "normalizing bias features ...\n",
      "number of ebias features =11772\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11772,)\n",
      "len of dfs: 11772\n",
      "the vocabulary size =:11772\n",
      "time passed:12595.627189159393\n",
      "DC0:base entropy=-1.5306218843328774, DC0:basete troenpy=-0.2665284809370888\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11772\n",
      "fle max=33.784332160565164\n",
      "normalizing bias features ...\n",
      "number of ebias features =11772\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11772,)\n",
      "len of dfs: 11772\n",
      "the vocabulary size =:11772\n",
      "time passed:12599.450979232788\n",
      "DC0:base entropy=-1.5306218843328774, DC0:basete troenpy=-0.2665284809370888\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11772\n",
      "fle max=4.515023722986448\n",
      "normalizing bias features ...\n",
      "number of ebias features =11772\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11772,)\n",
      "len of dfs: 11772\n",
      "the vocabulary size =:11772\n",
      "time passed:12603.318370103836\n",
      "DC0:base entropy=-1.5306218843328774, DC0:basete troenpy=-0.2665284809370888\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11772\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 37 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12046,)\n",
      "len of dfs: 12046\n",
      "the vocabulary size =:12046\n",
      "time passed:12607.117305994034\n",
      "DC0:base entropy=-1.5245460028760702, DC0:basete troenpy=-0.27310823875935625\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12046\n",
      "fle max=3.0847416963960086\n",
      "normalizing bias features ...\n",
      "number of ebias features =12046\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12046,)\n",
      "len of dfs: 12046\n",
      "the vocabulary size =:12046\n",
      "time passed:12611.043640851974\n",
      "DC0:base entropy=-1.5245460028760702, DC0:basete troenpy=-0.27310823875935625\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12046\n",
      "fle max=10000.655976005784\n",
      "normalizing bias features ...\n",
      "number of ebias features =12046\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12046,)\n",
      "len of dfs: 12046\n",
      "the vocabulary size =:12046\n",
      "time passed:12615.009824037552\n",
      "DC0:base entropy=-1.5245460028760702, DC0:basete troenpy=-0.27310823875935625\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12046\n",
      "fle max=52.73415276955094\n",
      "normalizing bias features ...\n",
      "number of ebias features =12046\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12046,)\n",
      "len of dfs: 12046\n",
      "the vocabulary size =:12046\n",
      "time passed:12618.98932313919\n",
      "DC0:base entropy=-1.5245460028760702, DC0:basete troenpy=-0.27310823875935625\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12046\n",
      "fle max=4.590596352295666\n",
      "normalizing bias features ...\n",
      "number of ebias features =12046\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12046,)\n",
      "len of dfs: 12046\n",
      "the vocabulary size =:12046\n",
      "time passed:12622.883787155151\n",
      "DC0:base entropy=-1.5245460028760702, DC0:basete troenpy=-0.27310823875935625\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =12046\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 38 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11806,)\n",
      "len of dfs: 11806\n",
      "the vocabulary size =:11806\n",
      "time passed:12626.870089054108\n",
      "DC0:base entropy=-1.5235461406491406, DC0:basete troenpy=-0.27444318100654724\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11806\n",
      "fle max=3.0918921152684358\n",
      "normalizing bias features ...\n",
      "number of ebias features =11806\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11806,)\n",
      "len of dfs: 11806\n",
      "the vocabulary size =:11806\n",
      "time passed:12630.66421198845\n",
      "DC0:base entropy=-1.5235461406491406, DC0:basete troenpy=-0.27444318100654724\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11806\n",
      "fle max=10000.656406533397\n",
      "normalizing bias features ...\n",
      "number of ebias features =11806\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11806,)\n",
      "len of dfs: 11806\n",
      "the vocabulary size =:11806\n",
      "time passed:12634.470625162125\n",
      "DC0:base entropy=-1.5235461406491406, DC0:basete troenpy=-0.27444318100654724\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11806\n",
      "fle max=34.776314922835155\n",
      "normalizing bias features ...\n",
      "number of ebias features =11806\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11806,)\n",
      "len of dfs: 11806\n",
      "the vocabulary size =:11806\n",
      "time passed:12638.266228199005\n",
      "DC0:base entropy=-1.5235461406491406, DC0:basete troenpy=-0.27444318100654724\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11806\n",
      "fle max=4.591931294542857\n",
      "normalizing bias features ...\n",
      "number of ebias features =11806\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11806,)\n",
      "len of dfs: 11806\n",
      "the vocabulary size =:11806\n",
      "time passed:12642.070662021637\n",
      "DC0:base entropy=-1.5235461406491406, DC0:basete troenpy=-0.27444318100654724\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11806\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 39 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11835,)\n",
      "len of dfs: 11835\n",
      "the vocabulary size =:11835\n",
      "time passed:12645.916427135468\n",
      "DC0:base entropy=-1.5112431064513256, DC0:basete troenpy=-0.2824759917256702\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11835\n",
      "fle max=3.070264097198696\n",
      "normalizing bias features ...\n",
      "number of ebias features =11835\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11835,)\n",
      "len of dfs: 11835\n",
      "the vocabulary size =:11835\n",
      "time passed:12649.704773902893\n",
      "DC0:base entropy=-1.5112431064513256, DC0:basete troenpy=-0.2824759917256702\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11835\n",
      "fle max=10000.661750694378\n",
      "normalizing bias features ...\n",
      "number of ebias features =11835\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11835,)\n",
      "len of dfs: 11835\n",
      "the vocabulary size =:11835\n",
      "time passed:12653.526376008987\n",
      "DC0:base entropy=-1.5112431064513256, DC0:basete troenpy=-0.2824759917256702\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11835\n",
      "fle max=33.771545698359404\n",
      "normalizing bias features ...\n",
      "number of ebias features =11835\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11835,)\n",
      "len of dfs: 11835\n",
      "the vocabulary size =:11835\n",
      "time passed:12657.379884243011\n",
      "DC0:base entropy=-1.5112431064513256, DC0:basete troenpy=-0.2824759917256702\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11835\n",
      "fle max=4.613209332012001\n",
      "normalizing bias features ...\n",
      "number of ebias features =11835\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11835,)\n",
      "len of dfs: 11835\n",
      "the vocabulary size =:11835\n",
      "time passed:12661.293800115585\n",
      "DC0:base entropy=-1.5112431064513256, DC0:basete troenpy=-0.2824759917256702\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11835\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 40 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11838,)\n",
      "len of dfs: 11838\n",
      "the vocabulary size =:11838\n",
      "time passed:12665.138864040375\n",
      "DC0:base entropy=-1.523632418794896, DC0:basete troenpy=-0.27350100787012666\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11838\n",
      "fle max=3.0905579320358108\n",
      "normalizing bias features ...\n",
      "number of ebias features =11838\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11838,)\n",
      "len of dfs: 11838\n",
      "the vocabulary size =:11838\n",
      "time passed:12668.913562059402\n",
      "DC0:base entropy=-1.523632418794896, DC0:basete troenpy=-0.27350100787012666\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11838\n",
      "fle max=10000.656369360877\n",
      "normalizing bias features ...\n",
      "number of ebias features =11838\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11838,)\n",
      "len of dfs: 11838\n",
      "the vocabulary size =:11838\n",
      "time passed:12672.725394964218\n",
      "DC0:base entropy=-1.523632418794896, DC0:basete troenpy=-0.27350100787012666\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11838\n",
      "fle max=37.770655710582645\n",
      "normalizing bias features ...\n",
      "number of ebias features =11838\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11838,)\n",
      "len of dfs: 11838\n",
      "the vocabulary size =:11838\n",
      "time passed:12676.55044412613\n",
      "DC0:base entropy=-1.523632418794896, DC0:basete troenpy=-0.27350100787012666\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11838\n",
      "fle max=4.563960449018517\n",
      "normalizing bias features ...\n",
      "number of ebias features =11838\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11838,)\n",
      "len of dfs: 11838\n",
      "the vocabulary size =:11838\n",
      "time passed:12680.439277887344\n",
      "DC0:base entropy=-1.523632418794896, DC0:basete troenpy=-0.27350100787012666\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11838\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 41 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11838,)\n",
      "len of dfs: 11838\n",
      "the vocabulary size =:11838\n",
      "time passed:12684.414098978043\n",
      "DC0:base entropy=-1.5254708020812768, DC0:basete troenpy=-0.27271682706759126\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11838\n",
      "fle max=3.088152099312649\n",
      "normalizing bias features ...\n",
      "number of ebias features =11838\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11838,)\n",
      "len of dfs: 11838\n",
      "the vocabulary size =:11838\n",
      "time passed:12688.365800142288\n",
      "DC0:base entropy=-1.5254708020812768, DC0:basete troenpy=-0.27271682706759126\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11838\n",
      "fle max=10000.655578301772\n",
      "normalizing bias features ...\n",
      "number of ebias features =11838\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11838,)\n",
      "len of dfs: 11838\n",
      "the vocabulary size =:11838\n",
      "time passed:12692.277971029282\n",
      "DC0:base entropy=-1.5254708020812768, DC0:basete troenpy=-0.27271682706759126\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11838\n",
      "fle max=35.77561929160037\n",
      "normalizing bias features ...\n",
      "number of ebias features =11838\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11838,)\n",
      "len of dfs: 11838\n",
      "the vocabulary size =:11838\n",
      "time passed:12696.072088241577\n",
      "DC0:base entropy=-1.5254708020812768, DC0:basete troenpy=-0.27271682706759126\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11838\n",
      "fle max=4.535396704108907\n",
      "normalizing bias features ...\n",
      "number of ebias features =11838\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11838,)\n",
      "len of dfs: 11838\n",
      "the vocabulary size =:11838\n",
      "time passed:12699.843170166016\n",
      "DC0:base entropy=-1.5254708020812768, DC0:basete troenpy=-0.27271682706759126\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11838\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 42 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11722,)\n",
      "len of dfs: 11722\n",
      "the vocabulary size =:11722\n",
      "time passed:12703.668533086777\n",
      "DC0:base entropy=-1.5195623531552243, DC0:basete troenpy=-0.2777529849379059\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11722\n",
      "fle max=3.079148087387682\n",
      "normalizing bias features ...\n",
      "number of ebias features =11722\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11722,)\n",
      "len of dfs: 11722\n",
      "the vocabulary size =:11722\n",
      "time passed:12707.51502919197\n",
      "DC0:base entropy=-1.5195623531552243, DC0:basete troenpy=-0.2777529849379059\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11722\n",
      "fle max=10000.65812752644\n",
      "normalizing bias features ...\n",
      "number of ebias features =11722\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11722,)\n",
      "len of dfs: 11722\n",
      "the vocabulary size =:11722\n",
      "time passed:12711.280304908752\n",
      "DC0:base entropy=-1.5195623531552243, DC0:basete troenpy=-0.2777529849379059\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11722\n",
      "fle max=33.77567749709085\n",
      "normalizing bias features ...\n",
      "number of ebias features =11722\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11722,)\n",
      "len of dfs: 11722\n",
      "the vocabulary size =:11722\n",
      "time passed:12715.12530207634\n",
      "DC0:base entropy=-1.5195623531552243, DC0:basete troenpy=-0.2777529849379059\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11722\n",
      "fle max=4.608486325224237\n",
      "normalizing bias features ...\n",
      "number of ebias features =11722\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11722,)\n",
      "len of dfs: 11722\n",
      "the vocabulary size =:11722\n",
      "time passed:12718.88235616684\n",
      "DC0:base entropy=-1.5195623531552243, DC0:basete troenpy=-0.2777529849379059\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11722\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 43 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11931,)\n",
      "len of dfs: 11931\n",
      "the vocabulary size =:11931\n",
      "time passed:12722.677795171738\n",
      "DC0:base entropy=-1.528878256568257, DC0:basete troenpy=-0.26933571142005935\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11931\n",
      "fle max=3.0924343555719864\n",
      "normalizing bias features ...\n",
      "number of ebias features =11931\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11931,)\n",
      "len of dfs: 11931\n",
      "the vocabulary size =:11931\n",
      "time passed:12726.547547101974\n",
      "DC0:base entropy=-1.528878256568257, DC0:basete troenpy=-0.26933571142005935\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11931\n",
      "fle max=10000.654117100177\n",
      "normalizing bias features ...\n",
      "number of ebias features =11931\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11931,)\n",
      "len of dfs: 11931\n",
      "the vocabulary size =:11931\n",
      "time passed:12730.374758005142\n",
      "DC0:base entropy=-1.528878256568257, DC0:basete troenpy=-0.26933571142005935\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11931\n",
      "fle max=31.786198118120133\n",
      "normalizing bias features ...\n",
      "number of ebias features =11931\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11931,)\n",
      "len of dfs: 11931\n",
      "the vocabulary size =:11931\n",
      "time passed:12734.248878002167\n",
      "DC0:base entropy=-1.528878256568257, DC0:basete troenpy=-0.26933571142005935\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11931\n",
      "fle max=4.57340080462423\n",
      "normalizing bias features ...\n",
      "number of ebias features =11931\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11931,)\n",
      "len of dfs: 11931\n",
      "the vocabulary size =:11931\n",
      "time passed:12738.195489168167\n",
      "DC0:base entropy=-1.528878256568257, DC0:basete troenpy=-0.26933571142005935\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11931\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 44 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12000,)\n",
      "len of dfs: 12000\n",
      "the vocabulary size =:12000\n",
      "time passed:12742.070183038712\n",
      "DC0:base entropy=-1.5204053921148504, DC0:basete troenpy=-0.2779250898967179\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12000\n",
      "fle max=3.0745931967123337\n",
      "normalizing bias features ...\n",
      "number of ebias features =12000\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12000,)\n",
      "len of dfs: 12000\n",
      "the vocabulary size =:12000\n",
      "time passed:12745.989758014679\n",
      "DC0:base entropy=-1.5204053921148504, DC0:basete troenpy=-0.2779250898967179\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12000\n",
      "fle max=10000.657762581905\n",
      "normalizing bias features ...\n",
      "number of ebias features =12000\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12000,)\n",
      "len of dfs: 12000\n",
      "the vocabulary size =:12000\n",
      "time passed:12749.860138177872\n",
      "DC0:base entropy=-1.5204053921148504, DC0:basete troenpy=-0.2779250898967179\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12000\n",
      "fle max=38.76518663428773\n",
      "normalizing bias features ...\n",
      "number of ebias features =12000\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12000,)\n",
      "len of dfs: 12000\n",
      "the vocabulary size =:12000\n",
      "time passed:12753.726694107056\n",
      "DC0:base entropy=-1.5204053921148504, DC0:basete troenpy=-0.2779250898967179\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =12000\n",
      "fle max=4.5819901831008885\n",
      "normalizing bias features ...\n",
      "number of ebias features =12000\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12000,)\n",
      "len of dfs: 12000\n",
      "the vocabulary size =:12000\n",
      "time passed:12757.576743125916\n",
      "DC0:base entropy=-1.5204053921148504, DC0:basete troenpy=-0.2779250898967179\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =12000\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 45 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12038,)\n",
      "len of dfs: 12038\n",
      "the vocabulary size =:12038\n",
      "time passed:12761.526101112366\n",
      "DC0:base entropy=-1.5234590416865474, DC0:basete troenpy=-0.2736468587596393\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12038\n",
      "fle max=3.083607462842608\n",
      "normalizing bias features ...\n",
      "number of ebias features =12038\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12038,)\n",
      "len of dfs: 12038\n",
      "the vocabulary size =:12038\n",
      "time passed:12765.44720697403\n",
      "DC0:base entropy=-1.5234590416865474, DC0:basete troenpy=-0.2736468587596393\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12038\n",
      "fle max=10000.656444063832\n",
      "normalizing bias features ...\n",
      "number of ebias features =12038\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12038,)\n",
      "len of dfs: 12038\n",
      "the vocabulary size =:12038\n",
      "time passed:12769.409550189972\n",
      "DC0:base entropy=-1.5234590416865474, DC0:basete troenpy=-0.2736468587596393\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12038\n",
      "fle max=35.774772669934755\n",
      "normalizing bias features ...\n",
      "number of ebias features =12038\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12038,)\n",
      "len of dfs: 12038\n",
      "the vocabulary size =:12038\n",
      "time passed:12773.406923055649\n",
      "DC0:base entropy=-1.5234590416865474, DC0:basete troenpy=-0.2736468587596393\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =12038\n",
      "fle max=4.522142100808999\n",
      "normalizing bias features ...\n",
      "number of ebias features =12038\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(12038,)\n",
      "len of dfs: 12038\n",
      "the vocabulary size =:12038\n",
      "time passed:12777.346061229706\n",
      "DC0:base entropy=-1.5234590416865474, DC0:basete troenpy=-0.2736468587596393\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =12038\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 46 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11917,)\n",
      "len of dfs: 11917\n",
      "the vocabulary size =:11917\n",
      "time passed:12781.249681949615\n",
      "DC0:base entropy=-1.5305256637948654, DC0:basete troenpy=-0.26818679601177764\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11917\n",
      "fle max=3.091793644127672\n",
      "normalizing bias features ...\n",
      "number of ebias features =11917\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11917,)\n",
      "len of dfs: 11917\n",
      "the vocabulary size =:11917\n",
      "time passed:12785.097286939621\n",
      "DC0:base entropy=-1.5305256637948654, DC0:basete troenpy=-0.26818679601177764\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11917\n",
      "fle max=10000.65341298415\n",
      "normalizing bias features ...\n",
      "number of ebias features =11917\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11917,)\n",
      "len of dfs: 11917\n",
      "the vocabulary size =:11917\n",
      "time passed:12788.931229114532\n",
      "DC0:base entropy=-1.5305256637948654, DC0:basete troenpy=-0.26818679601177764\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11917\n",
      "fle max=37.774936657625915\n",
      "normalizing bias features ...\n",
      "number of ebias features =11917\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11917,)\n",
      "len of dfs: 11917\n",
      "the vocabulary size =:11917\n",
      "time passed:12792.801361083984\n",
      "DC0:base entropy=-1.5305256637948654, DC0:basete troenpy=-0.26818679601177764\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11917\n",
      "fle max=4.572251889215948\n",
      "normalizing bias features ...\n",
      "number of ebias features =11917\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11917,)\n",
      "len of dfs: 11917\n",
      "the vocabulary size =:11917\n",
      "time passed:12796.719664096832\n",
      "DC0:base entropy=-1.5305256637948654, DC0:basete troenpy=-0.26818679601177764\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11917\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 47 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11800,)\n",
      "len of dfs: 11800\n",
      "the vocabulary size =:11800\n",
      "time passed:12800.662453174591\n",
      "DC0:base entropy=-1.5192003048055616, DC0:basete troenpy=-0.27686359651527404\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11800\n",
      "fle max=3.0810442823635165\n",
      "normalizing bias features ...\n",
      "number of ebias features =11800\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11800,)\n",
      "len of dfs: 11800\n",
      "the vocabulary size =:11800\n",
      "time passed:12804.516138076782\n",
      "DC0:base entropy=-1.5192003048055616, DC0:basete troenpy=-0.27686359651527404\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11800\n",
      "fle max=10000.658284378482\n",
      "normalizing bias features ...\n",
      "number of ebias features =11800\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11800,)\n",
      "len of dfs: 11800\n",
      "the vocabulary size =:11800\n",
      "time passed:12808.330977201462\n",
      "DC0:base entropy=-1.5192003048055616, DC0:basete troenpy=-0.27686359651527404\n",
      "generating FLE troenpy, bias features ... for each tf\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of fle =11800\n",
      "fle max=31.780120232092106\n",
      "normalizing bias features ...\n",
      "number of ebias features =11800\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11800,)\n",
      "len of dfs: 11800\n",
      "the vocabulary size =:11800\n",
      "time passed:12812.15000295639\n",
      "DC0:base entropy=-1.5192003048055616, DC0:basete troenpy=-0.27686359651527404\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11800\n",
      "fle max=4.53954347355659\n",
      "normalizing bias features ...\n",
      "number of ebias features =11800\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11800,)\n",
      "len of dfs: 11800\n",
      "the vocabulary size =:11800\n",
      "time passed:12816.155957221985\n",
      "DC0:base entropy=-1.5192003048055616, DC0:basete troenpy=-0.27686359651527404\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11800\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 48 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11833,)\n",
      "len of dfs: 11833\n",
      "the vocabulary size =:11833\n",
      "time passed:12820.463713169098\n",
      "DC0:base entropy=-1.5159946051058868, DC0:basete troenpy=-0.2787871148144445\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11833\n",
      "fle max=3.08584714894669\n",
      "normalizing bias features ...\n",
      "number of ebias features =11833\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11833,)\n",
      "len of dfs: 11833\n",
      "the vocabulary size =:11833\n",
      "time passed:12824.68592596054\n",
      "DC0:base entropy=-1.5159946051058868, DC0:basete troenpy=-0.2787871148144445\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11833\n",
      "fle max=10000.65967646869\n",
      "normalizing bias features ...\n",
      "number of ebias features =11833\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11833,)\n",
      "len of dfs: 11833\n",
      "the vocabulary size =:11833\n",
      "time passed:12828.665209054947\n",
      "DC0:base entropy=-1.5159946051058868, DC0:basete troenpy=-0.2787871148144445\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11833\n",
      "fle max=32.77653649054799\n",
      "normalizing bias features ...\n",
      "number of ebias features =11833\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11833,)\n",
      "len of dfs: 11833\n",
      "the vocabulary size =:11833\n",
      "time passed:12832.753736019135\n",
      "DC0:base entropy=-1.5159946051058868, DC0:basete troenpy=-0.2787871148144445\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11833\n",
      "fle max=4.660813749488326\n",
      "normalizing bias features ...\n",
      "number of ebias features =11833\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11833,)\n",
      "len of dfs: 11833\n",
      "the vocabulary size =:11833\n",
      "time passed:12837.010905981064\n",
      "DC0:base entropy=-1.5159946051058868, DC0:basete troenpy=-0.2787871148144445\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11833\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      " ... 49 sampling ....\n",
      "/Users/arthur/work/pn2022/PN/data/knn/all_bbcsport_by_line.txt\n",
      "class_cricket--->0\n",
      "class_football--->1\n",
      "class_rugby--->2\n",
      "class_tennis--->3\n",
      "class_athletics--->4\n",
      "737\n",
      "147\n",
      "590\n",
      "0--->0\n",
      "1--->1\n",
      "2--->2\n",
      "3--->3\n",
      "4--->4\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11708,)\n",
      "len of dfs: 11708\n",
      "the vocabulary size =:11708\n",
      "time passed:12841.183119058609\n",
      "DC0:base entropy=-1.5190630361036654, DC0:basete troenpy=-0.2780025340527394\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11708\n",
      "fle max=3.0802393372159784\n",
      "normalizing bias features ...\n",
      "number of ebias features =11708\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11708,)\n",
      "len of dfs: 11708\n",
      "the vocabulary size =:11708\n",
      "time passed:12845.153935909271\n",
      "DC0:base entropy=-1.5190630361036654, DC0:basete troenpy=-0.2780025340527394\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11708\n",
      "fle max=10000.658343867646\n",
      "normalizing bias features ...\n",
      "number of ebias features =11708\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11708,)\n",
      "len of dfs: 11708\n",
      "the vocabulary size =:11708\n",
      "time passed:12849.19766998291\n",
      "DC0:base entropy=-1.5190630361036654, DC0:basete troenpy=-0.2780025340527394\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11708\n",
      "fle max=37.7671521288339\n",
      "normalizing bias features ...\n",
      "number of ebias features =11708\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11708,)\n",
      "len of dfs: 11708\n",
      "the vocabulary size =:11708\n",
      "time passed:12853.229105949402\n",
      "DC0:base entropy=-1.5190630361036654, DC0:basete troenpy=-0.2780025340527394\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =11708\n",
      "fle max=4.647450386519761\n",
      "normalizing bias features ...\n",
      "number of ebias features =11708\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "initiating entropy square classifier ...\n",
      "it computes feature vs label entropy as weight, and multiply with tf-idf.\n",
      "there are 590 docs.\n",
      "shape of dfs:(11708,)\n",
      "len of dfs: 11708\n",
      "the vocabulary size =:11708\n",
      "time passed:12857.324936151505\n",
      "DC0:base entropy=-1.5190630361036654, DC0:basete troenpy=-0.2780025340527394\n",
      "generating FLE troenpy, bias features ... for each tf\n",
      "number of fle =0\n",
      "normalizing bias features ...\n",
      "number of ebias features =11708\n",
      "computing fidf ... \n",
      "fitting KNN .... using TF features...\n",
      "evaluating ... \n",
      "knn predicting ...\n",
      "(0.011700680272108856, 0.007701209087393154)\n",
      "(0.01034013605442178, 0.008842490780556158)\n",
      "(0.03333333333333336, 0.012111900554261805)\n",
      "(0.21074829931972786, 0.04034856687663272)\n"
     ]
    },
    {
     "ename": "AttributeError",
     "evalue": "'DataFrame' object has no attribute 'zcf'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m/var/folders/pg/88t62z716tx4bc3mxmsldm5r0000gn/T/ipykernel_51030/3284541760.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mncf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mncf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrncf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrncf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzcf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzcf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m   5987\u001b[0m         ):\n\u001b[1;32m   5988\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5989\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   5990\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5991\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'zcf'"
     ]
    }
   ],
   "source": [
    "#df=dfamazon\n",
    "dfbbc=repeatSampling_knn(bbcfn, Nsample=50)\n",
    "df=dfbbc\n",
    "#df=dftw\n",
    "#df=dfclass\n",
    "print((np.mean(df.idf), np.std(df.idf)))\n",
    "print((np.mean(df.it), np.std(df.it)))\n",
    "print((np.mean(df.ncf), np.std(df.ncf)))\n",
    "print((np.mean(df.rncf), np.std(df.rncf)))\n",
    "print((np.mean(df.zcf), np.std(df.zcf)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "9a3a846d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# print((np.mean(df.idf), np.std(df.idf)))\n",
    "# print((np.mean(df.it), np.std(df.it)))\n",
    "# print((np.mean(df.ncf), np.std(df.ncf)))\n",
    "# print((np.mean(df.rncf), np.std(df.rncf)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1e592d2b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8508df4e",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8bdc44c9",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "5169aa1d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0.21510204081632656, 0.028761595838190632)\n"
     ]
    }
   ],
   "source": [
    "#df=dfamazon\n",
    "df=dfbbc\n",
    "#df=dftw\n",
    "#df=dfclass\n",
    "# print((np.mean(df.idf), np.std(df.idf)))\n",
    "# print((np.mean(df.it), np.std(df.it)))\n",
    "# print((np.mean(df.ncf), np.std(df.ncf)))\n",
    "print((np.mean(df.rncf), np.std(df.rncf)))\n",
    "# print((np.mean(df.zcf), np.std(df.zcf)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "00f44e48",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "e9f757e3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0.10881249999999999, 0.007913842698082894)\n"
     ]
    }
   ],
   "source": [
    "df=dfamazon\n",
    "#df=dfbbc\n",
    "#df=dftw\n",
    "#df=dfclass\n",
    "# print((np.mean(df.idf), np.std(df.idf)))\n",
    "# print((np.mean(df.it), np.std(df.it)))\n",
    "# print((np.mean(df.ncf), np.std(df.ncf)))\n",
    "print((np.mean(df.rncf), np.std(df.rncf)))\n",
    "# print((np.mean(df.zcf), np.std(df.zcf)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "a4cf0365",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0.045736434108527124, 0.004991598609433345)\n"
     ]
    }
   ],
   "source": [
    "#df=dfamazon\n",
    "#df=dfbbc\n",
    "#df=dftw\n",
    "df=dfclass\n",
    "# print((np.mean(df.idf), np.std(df.idf)))\n",
    "# print((np.mean(df.it), np.std(df.it)))\n",
    "# print((np.mean(df.ncf), np.std(df.ncf)))\n",
    "print((np.mean(df.rncf), np.std(df.rncf)))\n",
    "# print((np.mean(df.zcf), np.std(df.zcf)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "661a0dbc",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "0a86b81d",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "## for the sampling 4 datasets\n",
    "## utility function for the 4 sampling datasets\n",
    "datanmlist=[bbcfn,twfn,amazonfn,classfn]\n",
    "def getsampledata(datanm=datanmlist[0]):\n",
    "    #print(datanm)\n",
    "    dat,Y=getdatafromFile(datanm)\n",
    "    #(dat,Y)=getdatafromFile(classfn)\n",
    "    Labdic=getLabDic(Y)\n",
    "    Y2=[Labdic[e] for e in Y]\n",
    "    #(traindat, train_lab, testdat, test_lab, labdic)=datasplit(dat,Y2)\n",
    "    return datasplit(dat,Y2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65cf973b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "897dba1a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3779d10c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d5687b8",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "f55f36d7",
   "metadata": {},
   "outputs": [],
   "source": [
    "def repeatSampling_knn_LR(filenm, Nsample=10):\n",
    "    idf=[]\n",
    "    it=[]## t: troenpy, pcf , pi\n",
    "    incf11=[] ## idf*entropy, e1-e_1\n",
    "    incf10=[] ## e1-e0\n",
    "    incf0_1=[] ## e0-e_1\n",
    "    ##LR\n",
    "    idf_LR=[]\n",
    "    it_LR=[]## t: troenpy, pcf , pi\n",
    "    incf11_LR=[] ## idf*entropy, e1-e_1\n",
    "    incf10_LR=[] ## e1-e0\n",
    "    incf0_1_LR=[] ## e0-e_1\n",
    "    \n",
    "    for i in range(Nsample):\n",
    "        ## get method idferr\n",
    "        print(f\" ... {i} sampling ....\")\n",
    "        traindat, train_lab, testdat, test_lab, labdic=getsampledata(filenm)\n",
    "        \n",
    "        e2=troenpy();\n",
    "        e2.train(traindat, train_lab, \"KNN\", useidf=True, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        err=e2.evaluate(testdat, test_lab, predModel=\"KNN\")\n",
    "        idf.append(err)\n",
    "        \n",
    "        e2=troenpy();\n",
    "        e2.train(traindat, train_lab, \"KNN\", tunec=100, usepcf=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        error=e2.evaluate(testdat, test_lab, predModel=\"KNN\")\n",
    "        it.append(error)\n",
    "        \n",
    "        #ncf11\n",
    "        e2=troenpy();\n",
    "        e2.train(traindat, train_lab, \"KNN\", tunec=100, usencf=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        error=e2.evaluate(testdat, test_lab, predModel=\"KNN\")\n",
    "        incf11.append(error)\n",
    "        \n",
    "        ##ncf10\n",
    "        e2=troenpy();\n",
    "        e2.train(traindat, train_lab, \"KNN\", tunec=100, usezcf=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        error=e2.evaluate(testdat, test_lab, predModel=\"KNN\")\n",
    "        incf10.append(error)\n",
    "        \n",
    "        ##ncf0_1\n",
    "        e2=troenpy();\n",
    "        e2.train(traindat, train_lab, \"KNN\", tunec=100, usencf0_1=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        error=e2.evaluate(testdat, test_lab, predModel=\"KNN\")\n",
    "        incf0_1.append(error)\n",
    "        \n",
    "        ### LR\n",
    "        print(\"logistic regression\")\n",
    "        e2=troenpy();\n",
    "        e2.train(traindat, train_lab, \"LR\", useidf=True, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        err=e2.evaluate(testdat, test_lab, predModel=\"LR\")\n",
    "        idf_LR.append(err)\n",
    "        \n",
    "        e2=troenpy();\n",
    "        e2.train(traindat, train_lab, \"LR\", tunec=100, usepcf=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        error=e2.evaluate(testdat, test_lab, predModel=\"LR\")\n",
    "        it_LR.append(error)\n",
    "        \n",
    "        #ncf11\n",
    "        e2=troenpy();\n",
    "        e2.train(traindat, train_lab, \"LR\", tunec=100, usencf=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        error=e2.evaluate(testdat, test_lab, predModel=\"LR\")\n",
    "        incf11_LR.append(error)\n",
    "        \n",
    "        ##ncf10\n",
    "        e2=troenpy();\n",
    "        e2.train(traindat, train_lab, \"LR\", tunec=100, usezcf=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        error=e2.evaluate(testdat, test_lab, predModel=\"LR\")\n",
    "        incf10_LR.append(error)\n",
    "        \n",
    "        ##ncf0_1\n",
    "        e2=troenpy();\n",
    "        e2.train(traindat, train_lab, \"LR\", tunec=100, usencf0_1=True, useidf=False, useBTF=False, usewt4BTF=False, usetf=True, usebiasfea=False, usebiaswt=False, stopwordlist=stop_words)\n",
    "        error=e2.evaluate(testdat, test_lab, predModel=\"LR\")\n",
    "        incf0_1_LR.append(error)\n",
    "        \n",
    "    #res=[]\n",
    "    #res.append((np.mean(idferr), np.std(idferr)))\n",
    "    result={\"idf\":idf, 'it':it, 'incf11':incf11, 'incf10':incf10, 'incf0_1':incf0_1, \n",
    "            \"idf_LR\":idf_LR, 'it_LR':it_LR, 'incf11_LR':incf11_LR, 'incf10_LR':incf10_LR, 'incf0_1_LR':incf0_1_LR}\n",
    "    df=pd.DataFrame(result)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "773d336b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "318628a5",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b165997d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "031cf520",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "id": "87e9ee7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# datanmlist=[bbcfn,twfn,amazonfn,classfn]\n",
    "# dfbbc=repeatSampling(bbcfn, Nsample=100)\n",
    "# dfamazon=repeatSampling(amazonfn, Nsample=100)\n",
    "# dftw=repeatSampling(twfn, Nsample=100)\n",
    "# dfclass=repeatSampling(classfn, Nsample=100)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 210,
   "id": "d4890498",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0.04867277425416961, 0.004620929971693432)\n",
      "(0.03782006107587503, 0.004658038828450234)\n",
      "(0.03782006107587503, 0.004658038828450234)\n",
      "(0.03782006107587503, 0.004658038828450234)\n",
      "(0.03782006107587503, 0.004658038828450234)\n",
      "LR\n",
      "(0.02978623443739722, 0.004378105804922443)\n",
      "(0.02692036645525017, 0.004086575377586589)\n",
      "(0.02692036645525017, 0.004086575377586589)\n",
      "(0.02692036645525017, 0.004086575377586589)\n",
      "(0.02692036645525017, 0.004086575377586589)\n"
     ]
    }
   ],
   "source": [
    "df=dfclass\n",
    "print((np.mean(df.idf), np.std(df.idf)))\n",
    "print((np.mean(df.it), np.std(df.it)))\n",
    "print((np.mean(df.incf11), np.std(df.incf11)))\n",
    "print((np.mean(df.incf10), np.std(df.incf10)))\n",
    "print((np.mean(df.incf0_1), np.std(df.incf0_1)))\n",
    "##LR\n",
    "print(\"LR\")\n",
    "print((np.mean(df.idf_LR), np.std(df.idf_LR)))\n",
    "print((np.mean(df.it_LR), np.std(df.it_LR)))\n",
    "print((np.mean(df.incf11_LR), np.std(df.incf11_LR)))\n",
    "print((np.mean(df.incf10_LR), np.std(df.incf10_LR)))\n",
    "print((np.mean(df.incf0_1_LR), np.std(df.incf0_1_LR)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 211,
   "id": "08e2c4e2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0.1156875, 0.007385144858656005)\n",
      "(0.06514583333333335, 0.005817724653065744)\n",
      "(0.06514583333333335, 0.005817724653065744)\n",
      "(0.06514583333333335, 0.005817724653065744)\n",
      "(0.06514583333333335, 0.005817724653065744)\n",
      "LR\n",
      "(0.05114583333333334, 0.0051135118531418534)\n",
      "(0.04891666666666668, 0.005193980222869119)\n",
      "(0.04891666666666668, 0.005193980222869119)\n",
      "(0.04891666666666668, 0.005193980222869119)\n",
      "(0.04891666666666668, 0.005193980222869119)\n"
     ]
    }
   ],
   "source": [
    "df=dfamazon\n",
    "print((np.mean(df.idf), np.std(df.idf)))\n",
    "print((np.mean(df.it), np.std(df.it)))\n",
    "print((np.mean(df.incf11), np.std(df.incf11)))\n",
    "print((np.mean(df.incf10), np.std(df.incf10)))\n",
    "print((np.mean(df.incf0_1), np.std(df.incf0_1)))\n",
    "##LR\n",
    "print(\"LR\")\n",
    "print((np.mean(df.idf_LR), np.std(df.idf_LR)))\n",
    "print((np.mean(df.it_LR), np.std(df.it_LR)))\n",
    "print((np.mean(df.incf11_LR), np.std(df.incf11_LR)))\n",
    "print((np.mean(df.incf10_LR), np.std(df.incf10_LR)))\n",
    "print((np.mean(df.incf0_1_LR), np.std(df.incf0_1_LR)))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2e35b33c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 209,
   "id": "7631caae",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0.28694489031567694, 0.015422030448859553)\n",
      "(0.2804708400214018, 0.017641578077045597)\n",
      "(0.2804708400214018, 0.017641578077045597)\n",
      "(0.2804708400214018, 0.017641578077045597)\n",
      "(0.2804708400214018, 0.017641578077045597)\n",
      "LR\n",
      "(0.2491706795077582, 0.01600143831737841)\n",
      "(0.26960941680042805, 0.014719975335885073)\n",
      "(0.26960941680042805, 0.014719975335885073)\n",
      "(0.26960941680042805, 0.014719975335885073)\n",
      "(0.26960941680042805, 0.014719975335885073)\n"
     ]
    }
   ],
   "source": [
    "df=dftw\n",
    "print((np.mean(df.idf), np.std(df.idf)))\n",
    "print((np.mean(df.it), np.std(df.it)))\n",
    "print((np.mean(df.incf11), np.std(df.incf11)))\n",
    "print((np.mean(df.incf10), np.std(df.incf10)))\n",
    "print((np.mean(df.incf0_1), np.std(df.incf0_1)))\n",
    "##LR\n",
    "print(\"LR\")\n",
    "print((np.mean(df.idf_LR), np.std(df.idf_LR)))\n",
    "print((np.mean(df.it_LR), np.std(df.it_LR)))\n",
    "print((np.mean(df.incf11_LR), np.std(df.incf11_LR)))\n",
    "print((np.mean(df.incf10_LR), np.std(df.incf10_LR)))\n",
    "print((np.mean(df.incf0_1_LR), np.std(df.incf0_1_LR)))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 208,
   "id": "f415fe08",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0.011111111111111124, 0.009374161727253702)\n",
      "(0.012018140589569175, 0.008178997186789775)\n",
      "(0.012018140589569175, 0.008178997186789775)\n",
      "(0.012018140589569175, 0.008178997186789775)\n",
      "(0.012018140589569175, 0.008178997186789775)\n",
      "(0.008616780045351484, 0.00803630165381831)\n",
      "(0.0022675736961451274, 0.004756502712789809)\n",
      "(0.0022675736961451274, 0.004756502712789809)\n",
      "(0.0022675736961451274, 0.004756502712789809)\n",
      "(0.0022675736961451274, 0.004756502712789809)\n"
     ]
    }
   ],
   "source": [
    "df=dfbbc\n",
    "print((np.mean(df.idf), np.std(df.idf)))\n",
    "print((np.mean(df.it), np.std(df.it)))\n",
    "print((np.mean(df.incf11), np.std(df.incf11)))\n",
    "print((np.mean(df.incf10), np.std(df.incf10)))\n",
    "print((np.mean(df.incf0_1), np.std(df.incf0_1)))\n",
    "##LR\n",
    "print((np.mean(df.idf_LR), np.std(df.idf_LR)))\n",
    "print((np.mean(df.it_LR), np.std(df.it_LR)))\n",
    "print((np.mean(df.incf11_LR), np.std(df.incf11_LR)))\n",
    "print((np.mean(df.incf10_LR), np.std(df.incf10_LR)))\n",
    "print((np.mean(df.incf0_1_LR), np.std(df.incf0_1_LR)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5b33e911",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0b803183",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1f3de3a7",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "6bb50f7e",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "class troenpy:\n",
    "    def __init__(self):\n",
    "        #self.name=name\n",
    "        print(\"initiating entropy square classifier ...\")\n",
    "        print(\"it computes feature vs label entropy as weight, and multiply with tf-idf.\")\n",
    "        self.vect=None\n",
    "        self.dfs=None\n",
    "        self.tfx=None\n",
    "        self.btfx=None\n",
    "        # feature Label entropy, measure the uncertainty of a feature in docs where the feature presents.\n",
    "        self.fle=None\n",
    "        ## original fle is pcf10 using Dc1 and Dc0\n",
    "        ## pcf using Troenpy\n",
    "        ## pcf using Dc1 and Dc_{-1}; ncf==entropy using Dc1 and DC_{-1}\n",
    "        self.pcf11=None\n",
    "        self.pcf10=None\n",
    "        self.ncf11=None\n",
    "        self.ncf10=None\n",
    "        self.nege=None ## negentropy\n",
    "        \n",
    "        self.ppf=None\n",
    "        ## inverse class freq\n",
    "        self.icf=None\n",
    "        self.idf=None\n",
    "        self.idfs_diag=None\n",
    "        self.tfidf=None\n",
    "        ## class term freq volatility/cv/coeff of variation\n",
    "        self.ctfv=None\n",
    "        ### term-class importance bias weight matrix, each term t has all n_class biases\n",
    "        ### change n_classes biases to only expected (e,t) 2 clolumns across classes instead\n",
    "        self.tcbiasmat=None\n",
    "        ### add bias features\n",
    "        self.usebiasfea=False\n",
    "        self.usebiaswt=False\n",
    "        ## expected class bias across classes\n",
    "        self.tcebiasmat=None\n",
    "        ## product of idf*fle\n",
    "        self.fidfs=None\n",
    "        self.fidfs_diag=None\n",
    "        self.ylabs=None\n",
    "        ## N: number of train docs; m: # of words; n_classes: uniq labels in the traindocs \n",
    "        self.N=None\n",
    "        self.M=None\n",
    "        self.n_classes=None\n",
    "        ## the class count vector/dic recording docs counts for each class, eg.[n1,n2,...,nk]\n",
    "        self.ccdic=None\n",
    "        self.nnn=0\n",
    "        ### support KNN, (LR) Logistic Regression\n",
    "        self.predModel=None\n",
    "        self.lrmodel=None\n",
    "        self.useBTF=False\n",
    "        self.usewt4BTF=False\n",
    "        self.wt4btfknn=None\n",
    "        ## use basic term freq feature, default to be true 12/31/2021\n",
    "        self.usetf=True\n",
    "        self.biastfx=None ## for KNN\n",
    "        self.useidf=False\n",
    "        return\n",
    "    \n",
    "    def train(self, traindocs, trainlab, predModel=\"KNN\", tunec=10, usenege=False, usepcf=False, usencf=False,userncf=False, \n",
    "              usencf01=False, useidf=False, useBTF=False, usewt4BTF=False, \n",
    "              usebiasfea=False, usebiaswt=False, usetf=True, stopwordlist=None):\n",
    "        #self.vect = CountVectorizer(ngram_range=(1,1), stop_words=None)\n",
    "        self.vect = CountVectorizer(ngram_range=(1,1), stop_words=stopwordlist)\n",
    "        self.N=len(traindocs)\n",
    "        print(f\"there are { self.N } docs.\")\n",
    "        self.predModel=predModel\n",
    "        self.usepcf=usepcf\n",
    "        self.usencf=usencf\n",
    "        self.useBTF=useBTF\n",
    "        self.usencf01=usencf01\n",
    "        self.usewt4BTF=usewt4BTF\n",
    "        self.usebiasfea=usebiasfea\n",
    "        self.usebiaswt=usebiaswt\n",
    "        self.useidf=useidf\n",
    "        self.usepcf=usepcf\n",
    "        self.usetf=usetf\n",
    "        self.usenege=usenege\n",
    "        # using 1/entropy(labels)\n",
    "        self.userncf=userncf\n",
    "        self.tfx=self.vect.fit_transform(traindocs)\n",
    "        self.dfs=csr_matrix(((self.tfx>0)*1).sum(axis=0)).toarray()[0]\n",
    "        print(\"shape of dfs:\"+str(self.dfs.shape))\n",
    "        print(\"len of dfs: \"+str(len(self.dfs)))\n",
    "        ### tfx1 matrix indicating existence of a word or not (1/0) for tfx0 purposes\n",
    "        # tfx1=sp.csr_matrix((tfx>0)*1)\n",
    "        ## l1 normalize to frequency for each row/doc\n",
    "        #self.tfx=csr_matrix(self.tfx/self.tfx.sum(axis=1))\n",
    "        self.btfx=(self.tfx>0)*1\n",
    "        self.M=len(self.vect.get_feature_names_out())\n",
    "        print(f\"the vocabulary size =:{self.M}\")\n",
    "        print(\"time passed:\"+ str(time.time()-t_s))\n",
    "        \n",
    "        self.ylabs=array(trainlab)\n",
    "        self.n_classes = len(np.unique(self.ylabs))\n",
    "        self.ccdic=Counter(self.ylabs)\n",
    "        ### max entropy \n",
    "        maxe=np.log(self.n_classes)\n",
    "        ### computing the Troenpy(c_i/N) of the class label distribution\n",
    "        #clas=Counter(self.ylabs)\n",
    "        L=len(self.ylabs)\n",
    "        basete=0\n",
    "        basee=0\n",
    "        for k,v in self.ccdic.items():\n",
    "            basete+=-(v/L)*np.log(L/(1+L-v))\n",
    "            basee+=-(v/L)*np.log(L/(1+v))\n",
    "        ## basete=T(Dc0); basee=E(Dc0)\n",
    "        print(f\"DC0:base entropy={basee}, DC0:basete troenpy={basete}\")\n",
    "        print(\"generating FLE troenpy, bias features ... for each tf\")\n",
    "        nege0=maxe-basee  ## negentropy of DC*\n",
    "        ## computing fle, tcbias\n",
    "        self.fle=[]\n",
    "        self.icf=[]\n",
    "        self.tcbias=[]\n",
    "        ## class specific troenpy\n",
    "        self.cfle=[]\n",
    "        for i in range(self.M):\n",
    "            fx=self.tfx[:,i].toarray()[:,0]\n",
    "            ## DC_{-1} docs without w_i\n",
    "            ys=self.ylabs[fx==0]\n",
    "            e_1,t_1=self.comp_TE_pair(ys)\n",
    "            ## DC_1 docs with w_i\n",
    "            ys=self.ylabs[fx>0]\n",
    "            #fx1=fx[fx>0]\n",
    "            e1,t1=self.comp_TE_pair(ys)\n",
    "            ## word/term present docs count dic\n",
    "            fc=Counter(ys)\n",
    "            ncc=len(fc.items()) ## number of present classes with the current term\n",
    "            ###\n",
    "            nf=len(ys) ## same as D, the totoal # of doc with present term t\n",
    "            biase=0\n",
    "            biaste=0\n",
    "            for t,tc in self.ccdic.items():\n",
    "                #expected CIB bias.append(np.log(tc/(1+fc[t]))-np.log((self.N-tc)/(1+nf-fc[t])))\n",
    "                biase+=(tc/self.N)*(np.log(tc/(1+fc[t]))-np.log((self.N-tc)/(1+nf-fc[t])))\n",
    "                biaste+=(tc/self.N)*(np.log(tc/(1+tc-fc[t]))-np.log((self.N-tc)/(1+self.N-tc-nf+fc[t])))\n",
    "            \n",
    "            #expected bias across classes\n",
    "            ebias=[biase, biaste]\n",
    "            ebias=np.array(ebias).reshape(2,1)\n",
    "            ## put in matrix\n",
    "            if self.tcebiasmat is None:\n",
    "                self.tcebiasmat=ebias\n",
    "            else:\n",
    "                self.tcebiasmat=np.hstack((self.tcebiasmat, ebias))\n",
    "            ##pcf11\n",
    "            if usepcf:\n",
    "                self.fle.append(t1-basete)\n",
    "            ##ncf11\n",
    "            if usencf:\n",
    "                self.fle.append(e1-basee)\n",
    "            if userncf:\n",
    "                self.fle.append(1.0/(0.0001+e1)-1.0/(0.0001+basee))\n",
    "            if usenege:\n",
    "                nege1=np.log(ncc)-e1\n",
    "                self.fle.append(nege1-nege0)\n",
    "            \n",
    "        \n",
    "        print(f'number of fle ={len(self.fle)}')\n",
    "        if len(self.fle)>0:\n",
    "            print(f\"fle max={max(self.fle)}\")\n",
    "        print(\"normalizing bias features ...\")\n",
    "        print(f\"number of ebias features ={self.tcebiasmat.shape[1]}\")\n",
    "        print(\"computing fidf ... \")\n",
    "        self.idfs=[1+np.log(self.N/(1+d)) for d in self.dfs]\n",
    "        ## useidf: use idf ony no other weighting\n",
    "        if self.useidf:\n",
    "            self.fidfs=self.idfs\n",
    "        else:\n",
    "            ## default idf*troenpy(fle)\n",
    "            self.fidfs=[d*f for d,f in zip(self.idfs, self.fle)]\n",
    "        self.fidfs_diag=diags(self.fidfs)\n",
    "        self.fle_diag=diags(self.fle)\n",
    "        self.idfs_diag=diags(self.idfs) \n",
    "        #btf dist\n",
    "        #Btfdist=BTFdist(tfx, tfy, idfs_diag, usedf)\n",
    "        #tfidf dist\n",
    "        self.tfx=normalize(self.tfx, norm='l1',axis=1)\n",
    "        if self.usebiasfea:\n",
    "            temp=self.tfx\n",
    "            if self.usebiaswt:\n",
    "                temp=self.tfx.dot(self.fle_diag)\n",
    "            self.biastfx=self.biastfxgenerator(temp, self.tcebiasmat)\n",
    "            self.biastfx=normalize(self.biastfx, norm='l2', axis=1)\n",
    "\n",
    "        self.tfx=self.tfx.dot(self.fidfs_diag)\n",
    "        self.tfx=normalize(self.tfx, norm='l2',axis=1)\n",
    "        if self.usewt4BTF:\n",
    "            #self.btfx=self.btfx.dot(self.fidfs_diag)\n",
    "            self.btfx=self.btfx.dot(self.fle_diag)\n",
    "        self.btfx=normalize(self.btfx, norm='l2',axis=1)\n",
    "        \n",
    "        if predModel=='KNN':\n",
    "            print(\"fitting KNN .... using TF features...\")\n",
    "            #self.btfx=normalize(self.btfx, norm='l2',axis=1)\n",
    "            return\n",
    "        \n",
    "        \n",
    "        print(\"fittting the Logistic Model ... \")\n",
    "        ## here tfx is the raw doc freq with suitable zdf weighting\n",
    "        #self.lrmodel=LR(multi_class='ovr', solver=\"liblinear\", random_state=0)\n",
    "        self.lrmodel=LR(multi_class='ovr', C=tunec, solver=\"liblinear\", random_state=0)\n",
    "        #self.lrmodel=LR(penalty='elasticnet', l1_ratio=0.05, C=10, multi_class='ovr', solver=\"saga\", random_state=0)\n",
    "        #self.lrmodel=LR(penalty='elasticnet', l1_ratio=0.05, C=10, multi_class='ovr', solver=\"saga\", random_state=0)\n",
    "        self.tfx=normalize(self.tfx, norm='l2',axis=1)\n",
    "        if self.usetf:\n",
    "            lrtfx=self.tfx\n",
    "        else:\n",
    "            lrtfx=None\n",
    "        if self.useBTF:\n",
    "            #self.tfx=normalize(self.tfx, norm='l2',axis=1)\n",
    "            self.btfx=normalize(self.btfx, norm='l2',axis=1)\n",
    "            if self.usetf:\n",
    "                lrtfx=sp.hstack((self.tfx, self.btfx), format='csr')\n",
    "            else:\n",
    "                lrtfx=self.btfx\n",
    "        if self.usebiasfea:\n",
    "            #self.biastfx=normalize(self.biastfx, norm='l1', axis=1)\n",
    "            if lrtfx is not None:\n",
    "                lrtfx=sp.hstack((lrtfx, self.biastfx), format='csr')\n",
    "            else:\n",
    "                lrtfx=self.biastfx\n",
    "        \n",
    "        self.lrmodel.fit(lrtfx, self.ylabs)\n",
    "        ## normalize for KNN model purpose\n",
    "        #self.tfx=normalize(self.tfx, norm='l2',axis=1)\n",
    "        self.btfx=normalize(self.btfx, norm='l2',axis=1)\n",
    "        print('done.')\n",
    "        return\n",
    "    \n",
    "    def comp_TE_pair(self,ys):\n",
    "        ## ys is the class label vector\n",
    "        troep=0\n",
    "        enp=0\n",
    "        ## docs class dic \n",
    "        fc=Counter(ys)\n",
    "        nf=len(ys)*1.0\n",
    "        for t in fc:\n",
    "            enp+=(fc[t]/nf)*np.log(nf/(1+fc[t]))\n",
    "            troep+=(fc[t]/nf)*np.log(nf/(1+nf-fc[t]))\n",
    "        return [enp,troep]\n",
    "    \n",
    "    def comp_gini(self,ys):\n",
    "        fc=Counter(ys)\n",
    "        nf=len(ys)\n",
    "        gini=1\n",
    "        for t in fc:\n",
    "            gini-=(fc[t]/nf)*(fc[t]/nf)\n",
    "        return gini\n",
    "    \n",
    "    def comp_tropy(self,ys):\n",
    "        fc=Counter(ys)\n",
    "        nf=len(ys)\n",
    "        tropy=1\n",
    "        for t in fc:\n",
    "            p=fc[t]/nf\n",
    "            tropy+=p/(1-p)\n",
    "        return tropy\n",
    "    \n",
    "    def predict(self, docs, predModel=\"KNN\"):\n",
    "        ## input docs format is required as a list of docs\n",
    "        tfy=self.vect.transform(docs)\n",
    "        tfy=normalize(tfy, norm='l1',axis=1)\n",
    "        #tfy=tfy.dot(self.fidfs_diag)\n",
    "        btfy=csr_matrix((tfy>0)*1)\n",
    "        if self.usewt4BTF:\n",
    "            btfy=btfy.dot(self.fle_diag)\n",
    "        btfy=normalize(btfy, norm='l2',axis=1)\n",
    "        if self.usebiasfea:\n",
    "            temp=tfy\n",
    "            if self.usebiaswt:\n",
    "                temp=tfy.dot(self.fle_diag)\n",
    "            biastfy=self.biastfxgenerator(temp, self.tcebiasmat)\n",
    "            biastfy=normalize(biastfy, norm='l2', axis=1)\n",
    "        if predModel==\"LR\":\n",
    "            tfy=tfy.dot(self.fidfs_diag)\n",
    "            tfy=normalize(tfy, norm='l2',axis=1)\n",
    "            if self.useBTF:\n",
    "                #btfy=csr_matrix((tfy>0)*1)\n",
    "                if self.usetf:\n",
    "                    tfy=sp.hstack((tfy,btfy), format='csr')\n",
    "                else:\n",
    "                    tfy=btfy\n",
    "            if self.usebiasfea:\n",
    "                #biastfy=self.biastfxgenerator(tfy, self.tcbiasmat)\n",
    "                #biastfy=normalize(biastfy, norm='l2', axis=1)\n",
    "                #biastfy=normalize(biastfy, norm='l1', axis=1)\n",
    "                if self.usetf or self.useBTF:\n",
    "                    tfy=sp.hstack((tfy, biastfy), format='csr')\n",
    "                else:\n",
    "                    tfy=biastfy\n",
    "                \n",
    "            plabs=self.lrmodel.predict(tfy)\n",
    "            return plabs\n",
    "        if predModel=='KNN':\n",
    "            print(\"knn predicting ...\")\n",
    "        ## pModel==\"KNN\"\n",
    "        Dist=None\n",
    "        if self.useBTF:\n",
    "            byxdist=pdist.pairwise_distances(btfy, self.btfx)   \n",
    "            Dist=byxdist\n",
    "        tfy=tfy.dot(self.fidfs_diag)\n",
    "        #tfy=csr_matrix(tfy/tfy.sum(axis=1))\n",
    "        tfy=normalize(tfy, norm='l2',axis=1)\n",
    "        yxdist=pdist.pairwise_distances(tfy, self.tfx)\n",
    "        if self.usetf:\n",
    "            if Dist is not None:\n",
    "                Dist+=yxdist\n",
    "            else:\n",
    "                Dist=yxdist\n",
    "        if self.usebiasfea:\n",
    "            biasdist=pdist.pairwise_distances(biastfy, self.biastfx)\n",
    "            if Dist is not None:\n",
    "                Dist+=biasdist\n",
    "            else:\n",
    "                Dist=biasdist\n",
    "        plabs=self.knnClassify(self.ylabs, Dist, n_neighbors=7)\n",
    "        return plabs\n",
    "    \n",
    "    def evaluate(self, testdocs, testlabs, predModel=\"KNN\"):\n",
    "        \"\"\"\n",
    "        (1) useBFTdist is an option to add BFTdistance to the KNN model classifier for better performance\n",
    "        (2) predModel=\"LR\" has no \"useBFTdist\" option.\n",
    "        \"\"\"\n",
    "        print(\"evaluating ... \")\n",
    "        predictionlabs=self.predict(testdocs, predModel)\n",
    "        #prediction=np.asarray(prediction)\n",
    "        count=0; ntest=len(predictionlabs)\n",
    "        for i in range(ntest):\n",
    "            if predictionlabs[i]==testlabs[i]:\n",
    "                count=count+1\n",
    "        test_error = 1 - count/ntest\n",
    "        return test_error\n",
    "    \n",
    "     \n",
    "    def KNNpredict(self, neighbor_classes, C, cross_val=False):\n",
    "        # Make sure all classes are considered\n",
    "        labels = np.concatenate((neighbor_classes, list(range(C))))\n",
    "        # Find class frequency among neighbors\n",
    "        weights = np.unique(labels, return_counts=True)[1]\n",
    "        # Find most popular class\n",
    "        prediction = np.argmax(weights)\n",
    "        nn=len(neighbor_classes)\n",
    "        # If most popular class is ambiguous try with fewer neighbors; else return\n",
    "        if sum(weights[prediction] == weights) > 1 and nn>2:\n",
    "        #if sum(weights[prediction] == weights) > 1:\n",
    "            return self.KNNpredict(neighbor_classes[:-2], C)\n",
    "        else:\n",
    "            return prediction\n",
    "        #return prediction\n",
    "    \n",
    "    \n",
    "    def knnClassify(self, y_train, dist, n_neighbors=7):\n",
    "        ## y_train: training docs labels; dist: testdocs vs traindocs pairwise distance matrix \n",
    "        # Number of all different classes\n",
    "        #n_classes = len(np.unique(y_train))\n",
    "        prediction = []\n",
    "        ntestsample=dist.shape[0]\n",
    "        for i in range(ntestsample):\n",
    "            doc_to_train=dist[i,:]\n",
    "            # Find indices of n_neighbors closest documents\n",
    "            rank = np.argsort(doc_to_train)[:n_neighbors]\n",
    "            # Make prediction based on most popular class among neighbors\n",
    "            prediction.append(self.KNNpredict(y_train[rank], self.n_classes))\n",
    "        return prediction\n",
    "    \n",
    "    def biastfxgenerator(self, tfx, biasmat):\n",
    "        \"\"\"\n",
    "        input: (1) tfx of size nxm as tf of docs;\n",
    "               (2) biasmat of size n_classes x m, each col represents the bias for each class\n",
    "        output:(1) a btfx matrix of size nx(n_classes*m), i.e, each feature will generate n_classes features using the bias col\n",
    "        \"\"\"\n",
    "        n,m=tfx.shape\n",
    "        print(f\"tfx dim: {n,m}\")\n",
    "        k,m2=biasmat.shape\n",
    "        print(f\"biasmat dim:{k,m2}\")\n",
    "        if m!=m2:\n",
    "            print(\"dim does not match!\")\n",
    "        biastfx=tfx.dot(diags(biasmat[0,:]))\n",
    "        for i in range(1,k):\n",
    "            tempbtfx=tfx.dot(diags(biasmat[i,:]))\n",
    "            #biastfx=np.hstack((biastfx, tempbtfx))\n",
    "            biastfx=sp.hstack((biastfx, tempbtfx), format='csr')\n",
    "        return biastfx\n",
    "    \n",
    "    \n",
    "    def entropy2(self, p):\n",
    "        #if p<0 or p>1:\n",
    "         #   return 0\n",
    "        q=1.0-p\n",
    "        entpy=-p*np.log(p)-q*np.log(q)\n",
    "        return entpy\n",
    "    \n",
    "    def pentropy(self, plist):\n",
    "        ## input is a prob list , e.g, [0.1,0.3,0.6]\n",
    "        ## troenpy\n",
    "        eps=0.001\n",
    "        res=0\n",
    "        for p in plist:\n",
    "            q=1.0-p\n",
    "            res=-p*np.log(eps+q)\n",
    "        return res\n",
    "    \n",
    "    def recadd(self):\n",
    "        if self.nnn>10:\n",
    "            print(f\"nnn = {self.nnn}\")\n",
    "            return\n",
    "        else:\n",
    "            print(f\"nnn={self.nnn}\")\n",
    "            self.nnn+=2\n",
    "            self.recadd()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39b6433f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
