import json

def compare_annotation(input_path):
    agreement_human_a_vs_human_b = 0
    agreement_human_a_vs_model_a = 0
    agreement_human_a_vs_model_b = 0
    agreement_model_a_vs_model_b = 0
    matches = []
    mismatches = []
    if input_path in ['geoen.json', 'polen.json', 'hisen.json']:
        with open(input_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        preference_a_human_a_vs_human_bs = []
        preference_a_human_a_vs_model_as = []
        preference_a_human_a_vs_model_bs = []
        preference_a_model_a_vs_model_bs = []
        preference_b_human_a_vs_human_bs = []
        preference_b_human_a_vs_model_as = []
        preference_b_human_a_vs_model_bs = []
        preference_b_model_a_vs_model_bs = []
        for d in data:
            print(d)
            preference_a_human_a_vs_human_b = d['student_answer_a vs student_answer_b']
            preference_a_human_a_vs_model_a = d['stud_answer_a vs model_answer_a']
            preference_a_human_a_vs_model_b = d['stud_answer_a vs model_answer_b']
            preference_a_model_a_vs_model_b = d['model_answer_a vs model_answer_b']
            preference_b_human_a_vs_human_b = d['student_answer_a vs student_answer_b_new']
            preference_b_human_a_vs_model_a = d['stud_answer_a vs model_answer_a_new']
            preference_b_human_a_vs_model_b = d['stud_answer_a vs model_answer_b_new']
            preference_b_model_a_vs_model_b = d['model_answer_a vs model_answer_b_new']
            if 'a' in preference_a_human_a_vs_human_b:
                preference_a_human_a_vs_human_bs.append(0)
            elif 'b' in preference_a_human_a_vs_human_b:
                preference_a_human_a_vs_human_bs.append(1)
            else:
                preference_a_human_a_vs_human_bs.append(2)
            if 'a' in preference_a_human_a_vs_model_a:
                preference_a_human_a_vs_model_as.append(0)
            elif 'b' in preference_a_human_a_vs_model_a:
                preference_a_human_a_vs_model_as.append(1)
            else:
                preference_a_human_a_vs_model_as.append(2)
            if 'a' in preference_a_human_a_vs_model_b:
                preference_a_human_a_vs_model_bs.append(0)
            elif 'b' in preference_a_human_a_vs_model_b:
                preference_a_human_a_vs_model_bs.append(1)
            else:
                preference_a_human_a_vs_model_bs.append(2)
            if 'a' in preference_a_model_a_vs_model_b:
                preference_a_model_a_vs_model_bs.append(0)
            elif 'b' in preference_a_model_a_vs_model_b:
                preference_a_model_a_vs_model_bs.append(1)
            else:
                preference_a_model_a_vs_model_bs.append(2)
            if 'a' in preference_b_human_a_vs_human_b:
                preference_b_human_a_vs_human_bs.append(0)
            elif 'b' in preference_b_human_a_vs_human_b:
                preference_b_human_a_vs_human_bs.append(1)
            else:
                preference_b_human_a_vs_human_bs.append(2)
            if 'a' in preference_b_human_a_vs_model_a:
                preference_b_human_a_vs_model_as.append(0)
            elif 'b' in preference_b_human_a_vs_model_a:
                preference_b_human_a_vs_model_as.append(1)
            else:
                preference_b_human_a_vs_model_as.append(2)
            if 'a' in preference_b_human_a_vs_model_b:
                preference_b_human_a_vs_model_bs.append(0)
            elif 'b' in preference_b_human_a_vs_model_b:
                preference_b_human_a_vs_model_bs.append(1)
            else:
                preference_b_human_a_vs_model_bs.append(2)
            if 'a' in preference_b_model_a_vs_model_b:
                preference_b_model_a_vs_model_bs.append(0)
            elif 'b' in preference_b_model_a_vs_model_b:
                preference_b_model_a_vs_model_bs.append(1)
            else:
                preference_b_model_a_vs_model_bs.append(2)
        agreement_human_a_vs_human_b = sum([1 for i in range(len(preference_a_human_a_vs_human_bs)) if preference_a_human_a_vs_human_bs[i] == preference_b_human_a_vs_human_bs[i]]) / len(preference_a_human_a_vs_human_bs)
        agreement_human_a_vs_model_a = sum([1 for i in range(len(preference_a_human_a_vs_model_as)) if preference_a_human_a_vs_model_as[i] == preference_b_human_a_vs_model_as[i]]) / len(preference_a_human_a_vs_model_as)
        agreement_human_a_vs_model_b = sum([1 for i in range(len(preference_a_human_a_vs_model_bs)) if preference_a_human_a_vs_model_bs[i] == preference_b_human_a_vs_model_bs[i]]) / len(preference_a_human_a_vs_model_bs)
        agreement_model_a_vs_model_b = sum([1 for i in range(len(preference_a_model_a_vs_model_bs)) if preference_a_model_a_vs_model_bs[i] == preference_b_model_a_vs_model_bs[i]]) / len(preference_a_model_a_vs_model_bs)
        print(agreement_human_a_vs_model_b)
        print(f'Agreement of two annotators on human vs human: {agreement_human_a_vs_human_b}')
        print(f'Agreement of two annotators on human vs model: {(agreement_human_a_vs_model_a + agreement_human_a_vs_model_b) / 2}')
        print(f'Agreement of two annotators on model vs model: {agreement_model_a_vs_model_b}')
        # separate disagreement
        for i, (pref_a, pref_b)in enumerate(zip(preference_a_human_a_vs_human_bs, preference_b_human_a_vs_human_bs)):
            if pref_a != pref_b:
                mismatches.append({
                    'id': i,
                    'reference': data[i]['Concise_Reference'],
                    'stud_answer_a': data[i]['refined_student_answer_a'],
                    'stud_answer_b': data[i]['refined_student_answer_b'],
                })
            else:
                matches.append({
                    'id': i,
                    'reference': data[i]['Concise_Reference'],
                    'stud_answer_a': data[i]['refined_student_answer_a'],
                    'stud_answer_b': data[i]['refined_student_answer_b'],
                })
        for i, (pref_a, pref_b) in enumerate(zip(preference_a_human_a_vs_model_as, preference_b_human_a_vs_model_as)):
            if pref_a != pref_b:
                mismatches.append({
                    'id': i,
                    'reference': data[i]['Concise_Reference'],
                    'stud_answer_a': data[i]['refined_student_answer_a'],
                    'model_answer_a': data[i]['model_answer_a'],
                })
            else:
                matches.append({
                    'id': i,
                    'reference': data[i]['Concise_Reference'],
                    'stud_answer_a': data[i]['refined_student_answer_a'],
                    'model_answer_a': data[i]['model_answer_a'],
                })  
        for i, (pref_a, pref_b) in enumerate(zip(preference_a_human_a_vs_model_bs, preference_b_human_a_vs_model_bs)):
            if pref_a != pref_b:
                mismatches.append({
                    'id': i,
                    'reference': data[i]['Concise_Reference'],
                    'stud_answer_a': data[i]['refined_student_answer_a'],
                    'model_answer_b': data[i]['model_answer_b'],
                })
            else:
                matches.append({
                    'id': i,
                    'reference': data[i]['Concise_Reference'],
                    'stud_answer_a': data[i]['refined_student_answer_a'],
                    'model_answer_b': data[i]['model_answer_b'],
                })
        for i,( pref_a, pref_b) in enumerate(zip(preference_a_model_a_vs_model_bs, preference_b_model_a_vs_model_bs)):
            if pref_a != pref_b:
                mismatches.append({
                    'id': i,
                    'reference': data[i]['Concise_Reference'],
                    'model_answer_a': data[i]['model_answer_a'],
                    'model_answer_b': data[i]['model_answer_b'],
                })
            else:
                matches.append({
                    'id': i,
                    'reference': data[i]['Concise_Reference'],
                    'model_answer_a': data[i]['model_answer_a'],
                    'model_answer_b': data[i]['model_answer_b'],
                })
        output_path = input_path.replace('.json', '_mismatches.json')
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(mismatches, f, ensure_ascii=False, indent=4)
        output_path = input_path.replace('.json', '_matches.json')
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(matches, f, ensure_ascii=False, indent=4)
    else:
        preference_a_model_a_vs_model_bs= []
        preference_b_model_a_vs_model_bs = []
        with open(input_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        for d in data:
            preference_a_model_a_vs_model_b = d['model_answer_a vs model_answer_b']
            preference_b_model_a_vs_model_b = d['model_answer_a vs model_answer_b_new']
            if 'a' in preference_a_model_a_vs_model_b:
                preference_a_model_a_vs_model_bs.append(0)
            elif 'b' in preference_a_model_a_vs_model_b:
                preference_a_model_a_vs_model_bs.append(1)
            else:
                preference_a_model_a_vs_model_bs.append(2)
            if 'a' in preference_b_model_a_vs_model_b:
                preference_b_model_a_vs_model_bs.append(0)
            elif 'b' in preference_b_model_a_vs_model_b:
                preference_b_model_a_vs_model_bs.append(1)
            else:
                preference_b_model_a_vs_model_bs.append(2)
        agreement_model_a_vs_model_b = sum([1 for i in range(len(preference_a_model_a_vs_model_bs)) if preference_a_model_a_vs_model_bs[i] == preference_b_model_a_vs_model_bs[i]]) / len(preference_a_model_a_vs_model_bs)
        print(f'Agreement of two annotators on model vs model: {agreement_model_a_vs_model_b}')
        for i, (pref_a, pref_b) in enumerate(zip(preference_a_model_a_vs_model_bs, preference_b_model_a_vs_model_bs)):
            if pref_a != pref_b:
                mismatches.append({
                    'id': i,
                    'reference': data[i]['Concise_Reference'],
                    'model_answer_a': data[i]['model_answer_a'],
                    'model_answer_b': data[i]['model_answer_b'],
                })
            else:
                matches.append({
                    'id': i,
                    'reference': data[i]['Concise_Reference'],
                    'model_answer_a': data[i]['model_answer_a'],
                    'model_answer_b': data[i]['model_answer_b'],
                })
        output_path = input_path.replace('.json', '_mismatches.json')
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(mismatches, f, ensure_ascii=False, indent=4)
        output_path = input_path.replace('.json', '_matches.json')
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(matches, f, ensure_ascii=False, indent=4)
            
def combine_annotation(input_path, anno_path):
    with open(anno_path, 'r', encoding='utf-8') as f:
        geoen = json.load(f)
        
    with open(input_path, 'r', encoding='utf-8') as f:
        geo = json.load(f)
    
    if anno_path in ['geoen.json', 'polen.json', 'hisen.json']:
        for data, data_ in zip(geo, geoen):
            data['decided_1_human_a_vs_human_b'] = 'human_a' if 'a' in data_['student_answer_a vs student_answer_b'] else 'human_b'
            data['decided_1_human_a_vs_model_a'] = 'human_a' if 'a' in data_['stud_answer_a vs model_answer_a'] else 'model_a'
            data['decided_1_human_a_vs_model_b'] = 'human_a' if 'a' in data_['stud_answer_a vs model_answer_b'] else 'model_b'
            data['decided_1_model_a_vs_model_b'] = 'model_a' if 'a' in data_['model_answer_a vs model_answer_b'] else 'model_b'
            data['decided_2_human_a_vs_human_b'] = 'human_a' if 'a' in data_['student_answer_a vs student_answer_b_new'] else 'human_b'
            data['decided_2_human_a_vs_model_a'] = 'human_a' if 'a' in data_['stud_answer_a vs model_answer_a_new'] else 'model_a'
            data['decided_2_human_a_vs_model_b'] = 'human_a' if 'a' in data_['stud_answer_a vs model_answer_b_new'] else 'model_b'
            data['decided_2_model_a_vs_model_b'] = 'model_a' if 'a' in data_['model_answer_a vs model_answer_b_new'] else 'model_b'
        output_path = input_path.replace('.json', '_combined.json')
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(geo, f, ensure_ascii=False, indent=4)
    else:
        for data, data_ in zip(geo, geoen):
            data['decided_1_model_a_vs_model_b'] = data_['model_answer_a vs model_answer_b']
            data['decided_2_model_a_vs_model_b'] = data_['model_answer_a vs model_answer_b_new']
        output_path = input_path.replace('.json', '_combined.json')
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(geo, f, ensure_ascii=False, indent=4)
            
with open('hisen.json', 'r', encoding='utf-8') as f:
    hisen = json.load(f)

acc_1 = 0
acc_2 = 0   
acc_3 = 0
acc_4 = 0
for data in hisen:
    if data['student_answer_a vs student_answer_b'] == data['student_answer_a vs student_answer_b_new']:
        acc_1 += 1
    if data['stud_answer_a vs model_answer_a'] == data['stud_answer_a vs model_answer_a_new']:
        acc_2 += 1
    if data['stud_answer_a vs model_answer_b'] == data['stud_answer_a vs model_answer_b_new']:
        acc_3 += 1
    if data['model_answer_a vs model_answer_b'] == data['model_answer_a vs model_answer_b_new']:
        acc_4 += 1
        
print(acc_1 / len(hisen))
print((acc_2 + acc_3) / (len(hisen) * 2))
print(acc_4 / len(hisen))
