import json
import argparse
import jsonlines
from common.utils import extract_after


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_file', type=str, required=True)
    args = parser.parse_args()
    with jsonlines.open(args.input_file) as reader:
        data = list(reader)
    count = 0
    output_datas = []
    for item in data:
        response = item['response']
        task = extract_after(response, 'Question')
        if task is None:
            continue
        count += 1
        output_data = {
            'reassemble_task': task,
            'raw_task': item['metadata']['raw_task'],
            'metadata': item['metadata']
        }
        output_datas.append(output_data)
    print(count)
    file_name = '.'.join(args.input_file.split('.')[:-1])
    output_file = f'{file_name}_pp.jsonl'
    with jsonlines.open(output_file, 'w') as writer:
        writer.write_all(output_datas)
    with open(f'json_files/{output_file}', 'w') as writer:
        json.dump(output_datas, writer)


if __name__ == '__main__':
    main()
