import argparse

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Postprocess the generated log')
    parser.add_argument('--dataset', type=str, help='The dataset name')
    parser.add_argument('--seq_len', type=str, help='The sequence length')
    parser.add_argument('--log_file_path', type=str, help='The path to the log file')
    parser.add_argument('--output_file_path', type=str, help='The path to the log file')

    args = parser.parse_args()

    # read the log file
    with open(args.log_file_path, 'r') as f:
        lines = f.readlines()

    # from end to beginning, find the first line with 'loss'
    for i in range(len(lines)-1, -1, -1):
        if 'loss' in lines[i] and 'ppl' in lines[i] and 'acc' in lines[i] and 'ppl_std' in lines[i] and 'acc_std' in lines[i]:
            break

    ppl_str = lines[i].split('ppl=')[-1].split(',')[0]
    acc_str = lines[i].split('acc=')[-1].split(',')[0]
    ppl_std_str = lines[i].split('ppl_std=')[-1].split(',')[0]
    acc_std_str = lines[i].split('acc_std=')[-1].split(']')[0]

    if ppl_str == 'NaN':
        ppl = 'NaN'
    else:
        ppl = float(ppl_str)

    if acc_str == 'NaN':
        acc = 'NaN'
    else:
        acc = float(acc_str)
    
    if ppl_std_str == 'NaN':
        ppl_std = 'NaN'
    else:
        ppl_std = float(ppl_std_str)

    if acc_std_str == 'NaN':
        acc_std = 'NaN'
    else:
        acc_std = float(acc_std_str)

    # create outputfile
    with open(args.output_file_path, 'a') as f:
        f.write(f'{args.dataset},{args.seq_len}. ppl:{ppl}; acc:{acc}; ppl_std:{ppl_std}; acc_std:{acc_std}\n')
