import pandas as pd

task = ['分析', '常识推理', '推荐', '编辑', '问答', '其他', '分类', '开放式生成', '描述', '识别', '写信', '判断',
        '抽取', '摘要', '转换']

for t in task:
    llama2_pre_df = pd.read_csv('')
    chatglm_df = pd.read_csv('')
    llama2_df = pd.read_csv('')
    vicuna_df = pd.read_csv('')

    question = pd.DataFrame()
    question["question_id"] = [i for i in range(1, len(llama2_df['en_input']) + 1)]
    question["text"] = llama2_df['en_input']
    question["category"] = t

    question.to_json(f'', orient='records', lines=True,
                     force_ascii=False)

    llama2_df["question_id"] = [i for i in range(1, len(llama2_df['en_input']) + 1)]
    llama2_df["text"] = llama2_df["generation"].fillna(' ')
    llama2_df['en_input'] = llama2_df['en_input'].fillna(' ')
    for i in range(len(llama2_df)):
        prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n'
        prompt = prompt + '### Instruction:\n' + llama2_df.loc[i, 'en_input'] + '\n\n### Response:\n'
        llama2_df.loc[i, 'text'] = llama2_df.loc[i, 'text'].replace(prompt, '')

    llama2_df = llama2_df[["question_id", "text"]]
    llama2_df.to_json(f'', orient='records',
                      lines=True,
                      force_ascii=False)

    vicuna_df["question_id"] = [i for i in range(1, len(vicuna_df['en_input']) + 1)]
    vicuna_df["text"] = vicuna_df["generation"].fillna(' ')
    vicuna_df['en_input'] = vicuna_df['en_input'].fillna(' ')
    for i in range(len(vicuna_df)):
        prompt = f"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {vicuna_df.loc[i, 'en_input']} ASSISTANT:"

        vicuna_df.loc[i, 'text'] = vicuna_df.loc[i, 'text'].replace(prompt, '')

    vicuna_df = vicuna_df[["question_id", "text"]]
    vicuna_df.to_json(f'', orient='records', lines=True,
                      force_ascii=False)

    chatglm_df["question_id"] = [i for i in range(1, len(chatglm_df['en_input']) + 1)]
    chatglm_df["text"] = chatglm_df["generation"]
    chatglm_df['en_input'] = chatglm_df['en_input'].fillna(' ')
    for i in range(len(chatglm_df)):
        prompt = chatglm_df.loc[i, 'en_input']
        chatglm_df.loc[i, 'text'] = chatglm_df.loc[i, 'text'].replace(prompt, '')
    chatglm_df = chatglm_df[["question_id", "text"]]
    chatglm_df.to_json(f'', orient='records',
                       lines=True,
                       force_ascii=False)

    llama2_pre_df["question_id"] = [i for i in range(1, len(llama2_pre_df['en_input']) + 1)]
    llama2_pre_df["text"] = llama2_pre_df["generation"]
    llama2_pre_df['en_input'] = llama2_pre_df['en_input'].fillna(' ')
    for i in range(len(llama2_pre_df)):
        prompt = llama2_pre_df.loc[i, 'en_input']
        llama2_pre_df.loc[i, 'text'] = llama2_pre_df.loc[i, 'text'].replace(prompt, '')
    llama2_pre_df = llama2_pre_df[["question_id", "text"]]
    llama2_pre_df.to_json(f'', orient='records',
                          lines=True,
                          force_ascii=False)

    print(len(llama2_pre_df), len(vicuna_df), len(llama2_df), len(chatglm_df), len(question))
