import pandas as pd

df = pd.read_json('', lines=True)
add_df = pd.read_json('', lines=True)
alpaca_df = pd.read_parquet('')
literature_df = pd.read_csv('')

df = df.append(add_df).reset_index(drop=True)

df['en_other'] = ''

alpaca_df['en_input'] = alpaca_df['instruction']
alpaca_df['en_other'] = alpaca_df['input']
alpaca_df['en_output'] = alpaca_df['output']

df = df[['en_input', 'en_other', 'en_output']]
alpaca_df = alpaca_df[['en_input', 'en_other', 'en_output']]
df = df.append(alpaca_df).reset_index(drop=True)

literature_df['en_other'] = ''
literature_df = literature_df[['en_input', 'en_other', 'en_output']]
df = df.append(literature_df).reset_index(drop=True)

print(len(df))

df.to_json('', orient='records', lines=True, force_ascii=False)