{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f0619c5e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import glob\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a0ebb731",
   "metadata": {},
   "outputs": [],
   "source": [
    "directory = './'\n",
    "jsonl_files = glob.glob(os.path.join(directory, '*.csv'))\n",
    "file_names = [os.path.basename(f) for f in jsonl_files]\n",
    "print(file_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e9eb8733",
   "metadata": {},
   "outputs": [],
   "source": [
    "for file_name in file_names:\n",
    "    df = pd.read_csv(os.path.join(directory,file_name))\n",
    "    df.rename(columns={'class': 'labels', 'text': 'sentence'}, inplace=True)\n",
    "    train_df,test_df = train_test_split(df,test_size=0.3, random_state=42)\n",
    "    train_df = train_df.reset_index(drop=True)\n",
    "    test_df = test_df.reset_index(drop=True)\n",
    "    new_name = file_name.split('-')[0]\n",
    "    new_train_name = f'{new_name}_train.csv'\n",
    "    new_test_name = f'{new_name}_test.csv'\n",
    "    train_df[['id', 'sentence', 'labels']].to_csv(os.path.join(directory,new_train_name), index=False)\n",
    "    test_df[['id', 'sentence', 'labels']].to_csv(os.path.join(directory,new_test_name), index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "topo",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
