import pandas as pd
import torch
from sklearn.model_selection import train_test_split

# 1. 读取数据
df = pd.read_csv('all.csv')

# 2. 按 1% 比例划分，固定随机种子保证可复现
test_df, train_df, test_idx, train_idx = train_test_split(
    df,                    # 数据
    df.index,              # 对应的原始索引
    test_size=0.01,        # 1 % 测试集
    random_state=42,       # 固定种子
    shuffle=True           # 打乱
)

# 3. 保存划分后的 csv
train_df.to_csv('train.csv', index=False)
test_df.to_csv('test.csv', index=False)

# 4. 保存索引到 pt 文件
split_dict = {'train': train_idx.tolist(), 'test': test_idx.tolist()}
torch.save(split_dict, 'split_idx.pt')

print('划分完成！')
print('train shape:', train_df.shape)
print('test  shape:', test_df.shape)