import json

input_file = ""
detector = "binoculars_cxt"
field = "prob"

with open(input_file, "r") as f:
	data = json.load(f)

threshold = 0.8125
print(f"Using threshold: {threshold}")

results_dict = {
	"level1": [],
	"level2": [],
	"level3": [],
	"level4": [],
	"/reviews/": []
}

for key, val in data.items():
	# if '/reviews/' not in key:
	# 	continue

	if '/test/' not in key:
		continue

	# if 'neurips' in key:
	# 	continue

	score = val[detector][field]

	true_label = '/reviews/'

	if val['category'] != 'human':
		true_label = val['category']

	pred_label = 1 if score < threshold else 0

	results_dict[true_label].append(pred_label)

for label, preds in results_dict.items():
	print(f"{label}: {sum(preds)*100/len(preds)}% of {len(preds)}")

for label, preds in results_dict.items():
	print(f"${sum(preds)*100/len(preds):.1f}$", end=" & ")

print()

'''
results on jan 28, with consistent files:

LOGLIKELIHOOD:
Using threshold: -1.505952380952381

OLD DATA TEST ONLY:
level1: 97.81021897810218% of 274
level2: 96.35036496350365% of 274
level3: 72.43589743589743% of 936
level4: 0.42735042735042733% of 936

NEW DATA TEST ONLY:
level1: 46.04938271604938% of 810
level2: 40.90909090909091% of 660
level3: 30.5% of 2200
level4: 17.666666666666668% of 2700

/reviews/: 0.21367521367521367% of 468

LOGLIKELIHOOD_CXT:
Using threshold: -1.7861842105263157

OLD DATA TEST ONLY:
level1: 98.9051094890511% of 274
level2: 99.63503649635037% of 274
level3: 91.55982905982906% of 936
level4: 1.6025641025641026% of 936

NEW DATA TEST ONLY:
level1: 59.75308641975309% of 810
level2: 50.45454545454545% of 660
level3: 43.68181818181818% of 2200
level4: 27.814814814814813% of 2700

/reviews/: 0.42735042735042733% of 468

FAST_DETECT_GPT:
Using threshold: 0.9976572848262414

OLD DATA TEST ONLY:
level1: 100.0% of 274
level2: 100.0% of 274
level3: 97.54273504273505% of 936
level4: 3.5256410256410255% of 936

NEW DATA TEST ONLY:
level1: 72.09876543209876% of 810
level2: 68.18181818181819% of 660
level3: 63.13636363636363% of 2200
level4: 42.333333333333336% of 2700

/reviews/: 0.21367521367521367% of 468

FAST_DETECT_GPT_CXT:
Using threshold: 0.9808312792751944

OLD DATA TEST ONLY:
level1: 100.0% of 274
level2: 100.0% of 274
level3: 99.25213675213675% of 936
level4: 8.974358974358974% of 936

NEW DATA TEST ONLY:
level1: 75.30864197530865% of 810
level2: 73.93939393939394% of 660
level3: 68.22727272727273% of 2200
level4: 49.148148148148145% of 2700

/reviews/: 0.8547008547008547% of 468

BINOCULARS:
Using threshold: 0.7616279125213623

OLD DATA TEST ONLY:
level1: 51.09489051094891% of 274
level2: 50.72992700729927% of 274
level3: 39.636752136752136% of 936
level4: 0.0% of 936

NEW DATA TEST ONLY:
level1: 21.604938271604937% of 810
level2: 22.272727272727273% of 660
level3: 14.136363636363637% of 2200
level4: 6.814814814814815% of 2700

/reviews/: 0.0% of 468

BINOCULARS_CXT:
Using threshold: 0.8125

OLD DATA TEST ONLY:
level1: 50.72992700729927% of 274
level2: 53.284671532846716% of 274
level3: 33.65384615384615% of 936
level4: 0.0% of 936

NEW DATA TEST ONLY:
level1: 20.493827160493826% of 810
level2: 20.0% of 660
level3: 14.272727272727273% of 2200
level4: 6.481481481481482% of 2700

/reviews/: 0.0% of 468
'''