import jsonlines
import argparse
import csv
import numpy as np


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_path_1', type=str)
    parser.add_argument('--input_path_2', type=str)
    args = parser.parse_args()

    with jsonlines.open(args.input_path_1) as reader:
        data_1 = list(reader)
    with jsonlines.open(args.input_path_2) as reader:
        data_2 = list(reader)
    cls_1 = set([item['task'] for item in data_1 if 'clarification' in item['answer'].lower()])
    cls_2 = set([item['task'] for item in data_2 if 'clarification' in item['answer'].lower()])
    cls_both = cls_1.intersection(cls_2)
    cls_all = cls_1.union(cls_2)
    print(len(cls_both))
    print(len(cls_all))
    print(f"overlap 1: {len(cls_both) / len(cls_1) * 100:.2f}%")
    print(f"overlap 1: {len(cls_both) / len(cls_2) * 100:.2f}%")
    # print(len(thoughts_raw))
    # thought_raw_lens = [len(thought) for thought in thoughts_raw]
    # thought_cls_lens = [len(thought) for thought in thoughts_cls]
    # print(f'Thoughts Lengths on Original Questions: {np.mean(thought_raw_lens)}')
    # print(f'Thoughts Lengths on Ambiguous Questions: {np.mean(thought_cls_lens)}')


if __name__ == '__main__':
    main()
