#!/usr/bin/env python3
import sys
import copy
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--index",     type=int,   default=-1)
parser.add_argument("--criterion", type=str,   default='')
args = parser.parse_args()

train_data = 'DataminingContest2009.Task2.Train.Inputs.short.short.train'
test_data  = 'DataminingContest2009.Task2.Train.Inputs.short.short.test.expanded'
if args.index>=0:
    train_data += '.'+repr(args.index)
    test_data += '.'+repr(args.index)

if args.criterion == 'maximin':
    maximin = True
elif args.criterion == 'maximax':
    maximin = False
else: sys.exit()

f = open(train_data,'r')
lines = f.readlines()
f.close()

t = [0]*2
size = float(len(lines)-1)
liness = [[],[]]
for i in range(1,len(lines)):
    words = lines[i].split(',')
    v = int(words[15])
    t[v] += 1
    liness[v].append(lines[i])
prior = [float(i)/size for i in t]

buf = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 21, 23, 1434, 1600, 3278, 977, 983, 100, 1643, 364, 373, 118, 119, 98]
f5map = dict()
for i in range(len(buf)): f5map[buf[i]]=i

def calc_cond(lines):
    a  =[0]*2
    f1 = [0]*5
    f2 = [0]*2
    fl1 = [0]*2
    f3 = [0]*5
    f4 = [0]*5
    i1 = [0]*2
    i2 = [0]*2
    fl2 = [0]*2
    fl3 = [0]*2
    fl4 = [0]*2
    fl5 = [0]*len(buf)
    size = float(len(lines))
    for i in range(len(lines)):
        words = lines[i].split(',')
        v = float(words[0])
        if v==0:  a[0] += 1
        else:     a[1] += 1
        f1[int(words[3])] += 1
        f2[int(words[5])] += 1
        fl1[int(words[6])] += 1
        v = float(words[7])
        if v<=-5000:  f3[0] += 1
        elif v<=0:    f3[1] += 1
        elif v<=2000: f3[2] += 1
        elif v<=5000: f3[3] += 1
        else:         f3[4] += 1
        v = float(words[8])
        if v<=8:  f4[0] += 1
        elif v<=10:  f4[1] += 1
        elif v<=15:  f4[2] += 1
        elif v<=20:  f4[3] += 1
        else: f4[4] += 1
        i1[int(words[9])] += 1
        i2[int(words[10])] += 1
        fl2[int(words[11])] += 1
        fl3[int(words[12])] += 1
        fl4[int(words[13])] += 1
        fl5[f5map[int(words[14])]] += 1
    return [[float(i)/size for i in a],
            [float(i)/size for i in f1],
            [float(i)/size for i in f2],
            [float(i)/size for i in fl1],
            [float(i)/size for i in f3],
            [float(i)/size for i in f4],
            [float(i)/size for i in i1],
            [float(i)/size for i in i2],
            [float(i)/size for i in fl2],
            [float(i)/size for i in fl3],
            [float(i)/size for i in fl4],
            [float(i)/size for i in fl5]]

cond0 = calc_cond(liness[0])
cond1 = calc_cond(liness[1])

f = open('bounds.input','r')
lines = f.readlines()
f.close()
prLB = []
prUB = []
for i in range(1,4):
    words = lines[i].split(' ')
    prLB.append(float(words[0]))
    prUB.append(float(words[1]))

f = open(test_data,'r')
lines = f.readlines()
f.close()

acc  = 0
tp   = 0
fp   = 0
fcnt = 0
for i in range(1,len(lines)):
    words = lines[i].split(',')
    if not int(words[16]) and not int(words[17]) and not int(words[18]):
        if maximin: p = [0.,0.]
        else: p = [1.,1.]
    else:
        pL = 1.
        pU = 1.
        if int(words[16]):
            pL *= prLB[0]
            pU *= prUB[0]
        if int(words[17]):
            pL *= prLB[1]
            pU *= prUB[1]
        if int(words[18]):
            pL *= prLB[2]
            pU *= prUB[2]
        if maximin: p = [pL,1.-pU]
        else: p = [pU,1.-pL]
    v = (float(words[0])==0)
    p[0] *= cond0[0][v]
    p[1] *= cond1[0][v]
    v = int(words[3])
    p[0] *= cond0[1][v]
    p[1] *= cond1[1][v]
    v = int(words[5])
    p[0] *= cond0[2][v]
    p[1] *= cond1[2][v]
    v = int(words[6])
    p[0] *= cond0[3][v]
    p[1] *= cond1[3][v]
    vv = float(words[7])
    if vv<=-5000:  v=0
    elif vv<=0:    v=1
    elif vv<=2000: v=2
    elif vv<=5000: v=3
    else:          v=4
    p[0] *= cond0[4][v]
    p[1] *= cond1[4][v]
    vv = float(words[8])
    if vv<=8:    v=0
    elif vv<=10: v=1
    elif vv<=15: v=2
    elif vv<=20: v=3
    else:        v=4
    p[0] *= cond0[5][v]
    p[1] *= cond1[5][v]
    v = int(words[9])
    p[0] *= cond0[6][v]
    p[1] *= cond1[6][v]
    v = int(words[10])
    p[0] *= cond0[7][v]
    p[1] *= cond1[7][v]
    v = int(words[11])
    p[0] *= cond0[8][v]
    p[1] *= cond1[8][v]
    v = int(words[12])
    p[0] *= cond0[9][v]
    p[1] *= cond1[9][v]
    v = int(words[13])
    p[0] *= cond0[10][v]
    p[1] *= cond1[10][v]
    v = f5map[int(words[14])]
    p[0] *= cond0[11][v]
    p[1] *= cond1[11][v]
    pred = (p[1] >= p[0])
    v = int(words[15])
    if pred == v: acc += 1
    fcnt += v
    if pred:
        if v: tp += 1
        else: fp += 1
print('accuracy',float(acc)/float(len(lines)-1))
precision = float(tp)/float(tp+fp)
print('precision',precision)
recall = float(tp)/float(fcnt)
print('recall',recall)
print('F1',2.*precision*recall/(precision+recall))
print('tp',tp)
print('fp',fp)
print('fcnt',fcnt)

