#!/bin/bash

# 配置参数
PORT=52223
SOURCE_DIR="/mnt/nas_v1/self-define/hurizhen/llm_data/c4_data/separate_c4_llama2_pretokenized"
DEST_DIR="root@101.71.91.166:/mnt/gxn/hrz/llm_data/c4_data/separate_c4_llama2_pretokenized"

echo "开始验证并传输文件（从00000到00199）"
total_transfered=0

# 处理文件范围00000到00199
for i in {200..1023}; do
    # 格式化数字为5位数
    num=$(printf "%05d" $i)
    
    # 定义文件路径
    bin_file="${SOURCE_DIR}/c4_llama2_${num}_text_document.bin"
    idx_file="${SOURCE_DIR}/c4_llama2_${num}_text_document.idx"
    
    # 检查文件是否存在
    if [[ -f "$bin_file" && -f "$idx_file" ]]; then
        echo "传输 ${num} 的文件..."
        scp -P $PORT "$bin_file" "$idx_file" "$DEST_DIR"
        if [ $? -eq 0 ]; then
            ((total_transfered+=2))
            echo "√ 传输成功"
        else
            echo "× 传输失败"
        fi
    else
        echo "警告：文件 ${num} 缺失，跳过"
        echo "未找到: $bin_file"
        echo "未找到: $idx_file"
    fi
done

echo "文件传输完成！"
echo "成功传输文件数: $total_transfered"
echo "如需传输完整数据集，请确保源目录包含所有文件"
