import json

import matplotlib.pyplot as plt

samples_path ='logicalDatasets/checked_dataset/related_word_symbolic_dataset.jsonl'

with open(samples_path, 'r') as f:
    samples = [json.loads(line) for line in f]
print('Number of samples:', len(samples))


queries_count = {'T':0, 'F':0, 'M':0}
total = 0
total_sample = len(samples)
op_count = {'StrongNegation':0, 'DefaultNegation':0, 'Disjunction':0, 'Constraint':0}
facts_count = 0
rules_count = 0
facts_num = []
rules_num = []
T_num = []
F_num = []
M_num = []
labels_num = []
p_num = []
c_num = []
l_num = []
for sample in samples:
    queries_count['T'] += sample['T']
    queries_count['F'] += sample['F']
    queries_count['M'] += sample['M']
    total += len(sample['queries'])

    facts_count += len(sample['facts'])
    facts_num.append(len(sample['facts']))
    rules_count += len(sample['rules'])
    rules_num.append(len(sample['rules']))
    T_num.append(sample['T'])
    F_num.append(sample['F'])
    M_num.append(sample['M'])
    p_num.append(sample['max_pnum'])
    c_num.append(sample['max_cnum'])
    labels_num.append(sample['T'] + sample['F'] + sample['M'])

    op_count_flag = {'StrongNegation':False, 'DefaultNegation':False, 'Disjunction':False, 'Constraint':False}

    lens = []
    for rule in sample['rules']:
        lens.append(len(rule.split(' ')))
        if ('-' in rule) and (not op_count_flag['StrongNegation']):
            op_count['StrongNegation'] += 1
            op_count_flag['StrongNegation'] = True

        if ('not' in rule) and (not op_count_flag['DefaultNegation']):
            op_count['DefaultNegation'] += 1
            op_count_flag['DefaultNegation'] = True

        if ('|' in rule) and (not op_count_flag['Disjunction']):
            op_count['Disjunction'] += 1
            op_count_flag['Disjunction'] = True

        if (rule.startswith(':-')) and (not op_count_flag['Constraint']):
            op_count['Constraint'] += 1
            op_count_flag['Constraint'] = True

    l_num.append(max(lens))

# 打印T F M的数量和比例
print('Total queries:', total)
print('T:', queries_count['T'], 'F:', queries_count['F'], 'M:', queries_count['M'])
print(f'T:{queries_count["T"] / total *3:.1f}, F:{queries_count["F"] / total*3:.1f}, M:{queries_count["M"] / total*3:.1f}')
print('op_count:', op_count)
print('op_count_p:', {k: f'{v/total_sample*100:.1f}' for k,v in op_count.items()})
print('avg facts_count:', facts_count/total_sample)
print('max facts', max(facts_num))
print('avg rules_count:', rules_count/total_sample)
print('max rules', max(rules_num))
print('avg labels_count:', total/total_sample)
print('avg l_num:', sum(l_num)/total_sample)
print('max labels_count:', max(labels_num))
print('min labels_count:', min(labels_num))


plt.figure(figsize=(10, 6))

# 将所有数据组放入一个列表中，每组数据作为一个列表元素
data = [facts_num, rules_num, labels_num,T_num, F_num, M_num, p_num, c_num][::-1]

plt.rc('font', size=20)
# 每组数据的名称
labels = ['Facts', 'Rules', 'Labels', 'T', 'F', 'M', 'p', 'c'][::-1]
# 绘制箱线图，设置 `vert=False` 以使箱线图水平显示，并添加标签
plt.boxplot(data, vert=False, labels=labels)
# 添加标题和标签
plt.title('Box Plot with Individual Data Points')
plt.xlabel('Value')
# 显示图表
plt.show()

#####################################
plt.figure(figsize=(10, 6))
plt.hist(p_num, bins=5, color='skyblue', alpha=0.7, edgecolor='black', linewidth=1)

# 设置标题和轴标签
plt.title('pnum')
plt.xlabel('Value')
plt.ylabel('Frequency')

# 显示图形
plt.show()

plt.figure(figsize=(10, 6))
plt.hist(c_num, bins=5, color='skyblue', alpha=0.7, edgecolor='black', linewidth=1)

# 设置标题和轴标签
plt.title('cnum')
plt.xlabel('Value')
plt.ylabel('Frequency')

# 显示图形
plt.show()


plt.figure(figsize=(10, 6))
plt.hist(l_num, bins=5, color='skyblue', alpha=0.7, edgecolor='black', linewidth=1)

# 设置标题和轴标签
plt.title('l_num')
plt.xlabel('Value')
plt.ylabel('Frequency')

# 显示图形
plt.show()