import json
import argparse
import jsonlines
from common.utils import extract_after


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_file', type=str, required=True)
    args = parser.parse_args()
    with jsonlines.open(args.input_file) as reader:
        data = list(reader)
    count = 0
    output_datas = []
    for item in data:
        response = item['response']
        unclear_task = extract_after(response, 'Assembled Text')
        if unclear_task is None:
            continue
        count += 1
        output_data = {
            'unclear_task': unclear_task,
            'raw_task': item['metadata']['raw_task'],
            'reason_of_unclearness': 'The problem lacks a specified goal to answer.',
            'solution': item['metadata']['solution'],
            'answer': item['metadata']['answer'],
            'level': item['metadata']['level'],
            'metadata': item['metadata']
        }
        output_datas.append(output_data)
    print(count)
    file_name = '.'.join(args.input_file.split('.')[:-1])
    output_file = f'{file_name}_pp.jsonl'
    with jsonlines.open(output_file, 'w') as writer:
        writer.write_all(output_datas)
    with open(f'json_files/{output_file}', 'w') as writer:
        json.dump(output_datas, writer)


if __name__ == '__main__':
    main()
