import re
import json
import argparse
import jsonlines
from common.utils import extract_between, extract_after


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_file', type=str, required=True)
    args = parser.parse_args()
    with jsonlines.open(args.input_file) as reader:
        data = list(reader)
    count = 0
    output_datas = []
    for item in data:
        response = item['response']
        unclear_task = extract_between(response, 'Unclear Question', 'Reason of Unclearness')
        reason = extract_after(response, 'Reason of Unclearness')
        if unclear_task is None or reason is None:
            continue
        if 'unclear' in unclear_task.lower():
            print(unclear_task)
            continue
        count += 1
        output_data = {
            'unclear_task': unclear_task,
            'raw_task': item['metadata']['raw_task'],
            'reason_of_unclearness': reason,
            'solution': item['metadata']['solution'],
            'answer': item['metadata']['answer'],
            'level': item['metadata']['level'],
            'metadata': item['metadata']
        }
        output_datas.append(output_data)
    print(count)
    file_name = '.'.join(args.input_file.split('.')[:-1])
    output_file = f'{file_name}_pp.jsonl'
    with jsonlines.open(output_file, 'w') as writer:
        writer.write_all(output_datas)
    with open(f'json_files/{output_file}', 'w') as writer:
        json.dump(output_datas, writer)


if __name__ == '__main__':
    main()
