import pandas as pd
import json

csv_file = 'final_agent_tasks_w_harm_idx.csv'
df = pd.read_csv(csv_file)

# Create a DataFrame for harmful tasks
harmful_df = pd.DataFrame({
    'harm_index': df['harmful_action_idx'],
    'original_task': df['Harmful-Original Task'],
    'decomposition': df['Harmful-Subtasks'],
    'source_label': df['Label']
})
harmful_df['label'] = 'harmful'

# Create a DataFrame for benign tasks
benign_df = pd.DataFrame({
    'harm_index': None,
    'original_task': df['Benign-Original Task'],
    'decomposition': df['Benign-Subtasks'],
    'source_label': df['Label']
})

benign_df['label'] = 'benign'
harmful_df['id'] = ['harmful_' + str(i).zfill(4) for i in range(len(harmful_df))]
benign_df['id'] = ['benign_' + str(i).zfill(4) for i in range(len(benign_df))]

for all_subset in ['Original', 'Ethan', 'Stephanie', 'Ricky', 'Alicia', 'Jose', 'Chanel']:
    all_df = pd.concat([harmful_df, benign_df], ignore_index=True)
    all_df = all_df[all_df.source_label == all_subset]
    output_data = all_df.to_dict(orient='records')
    filename=f'../../data/agent_tasks/{all_subset}_test_data.json'
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(output_data, f, ensure_ascii=False, indent=2)