#!/usr/bin/env python3
"""
MP4 to WAV Converter
将指定目录及其子目录下的所有mp4文件转换为wav文件，保存到mp4文件的相同目录。
"""

import argparse
import os
import sys
import subprocess
import time
from pathlib import Path


def check_ffmpeg():
    """检查系统是否安装了ffmpeg"""
    try:
        result = subprocess.run(
            ["ffmpeg", "-version"], capture_output=True, text=True, check=True
        )
        return True
    except (subprocess.CalledProcessError, FileNotFoundError):
        return False


def convert_mp4_to_wav(mp4_file, wav_file):
    """使用ffmpeg将mp4文件转换为wav文件"""
    try:
        cmd = [
            "ffmpeg",
            "-i",
            str(mp4_file),  # 输入文件
            "-vn",  # 禁用视频流
            "-acodec",
            "pcm_s16le",  # 音频编码器：16-bit PCM
            "-ar",
            "44100",  # 采样率：44.1kHz
            "-ac",
            "2",  # 声道数：立体声
            "-y",  # 覆盖输出文件
            str(wav_file),  # 输出文件
        ]

        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        return True, None

    except subprocess.CalledProcessError as e:
        error_msg = f"ffmpeg error: {e.stderr}"
        return False, error_msg
    except Exception as e:
        return False, str(e)


def find_mp4_files(dataset_root):
    """递归查找所有mp4文件"""
    mp4_files = []
    dataset_path = Path(dataset_root)

    if not dataset_path.exists():
        raise FileNotFoundError(f"目录不存在: {dataset_root}")

    if not dataset_path.is_dir():
        raise NotADirectoryError(f"不是一个目录: {dataset_root}")

    # 递归查找所有mp4文件
    for mp4_file in dataset_path.rglob("*.mp4"):
        if mp4_file.is_file():
            mp4_files.append(mp4_file)

    # 同时查找MP4大写扩展名
    for mp4_file in dataset_path.rglob("*.MP4"):
        if mp4_file.is_file():
            mp4_files.append(mp4_file)

    return sorted(mp4_files)


def main():
    parser = argparse.ArgumentParser(
        description="将指定目录及其子目录下的所有mp4文件转换为wav文件"
    )
    parser.add_argument("dataset_root", help="数据集根目录路径")
    parser.add_argument("--overwrite", action="store_true", help="覆盖已存在的wav文件")
    parser.add_argument(
        "--dry-run", action="store_true", help="只显示将要转换的文件，不执行实际转换"
    )

    args = parser.parse_args()

    # 检查ffmpeg是否可用
    if not check_ffmpeg():
        print("错误: 系统中未找到ffmpeg，请先安装ffmpeg")
        print("Ubuntu/Debian: sudo apt install ffmpeg")
        print("CentOS/RHEL: sudo yum install ffmpeg")
        print("macOS: brew install ffmpeg")
        sys.exit(1)

    try:
        # 查找所有mp4文件
        print(f"正在扫描目录: {args.dataset_root}")
        mp4_files = find_mp4_files(args.dataset_root)

        if not mp4_files:
            print("未找到任何mp4文件")
            return 0

        print(f"找到 {len(mp4_files)} 个mp4文件")

        if args.dry_run:
            print("\n预览转换计划:")
            for mp4_file in mp4_files:
                wav_file = mp4_file.with_suffix(".wav")
                print(f"  {mp4_file} → {wav_file}")
            return 0

        # 开始转换
        successful_conversions = 0
        failed_conversions = 0
        skipped_conversions = 0

        start_time = time.time()

        for i, mp4_file in enumerate(mp4_files, 1):
            wav_file = mp4_file.with_suffix(".wav")

            print(f"\n[{i}/{len(mp4_files)}] 正在处理: {mp4_file.name}")

            # 检查wav文件是否已存在
            if wav_file.exists() and not args.overwrite:
                print(f"  跳过 (wav文件已存在): {wav_file.name}")
                skipped_conversions += 1
                continue

            # 执行转换
            success, error_msg = convert_mp4_to_wav(mp4_file, wav_file)

            if success:
                print(f"  ✓ 转换成功: {wav_file.name}")
                successful_conversions += 1
            else:
                print(f"  ✗ 转换失败: {error_msg}")
                failed_conversions += 1

        end_time = time.time()
        total_time = end_time - start_time

        # 输出总结
        print("\n" + "=" * 60)
        print("转换完成统计")
        print("=" * 60)
        print(f"总处理时间: {total_time:.2f} 秒")
        print(f"成功转换: {successful_conversions} 个文件")
        print(f"转换失败: {failed_conversions} 个文件")
        print(f"跳过文件: {skipped_conversions} 个文件")
        print(f"总文件数: {len(mp4_files)} 个文件")

        if failed_conversions > 0:
            print(f"\n警告: {failed_conversions} 个文件转换失败")
            return 1

        print("\n✓ 所有文件转换完成!")
        return 0

    except FileNotFoundError as e:
        print(f"错误: {e}")
        return 1
    except NotADirectoryError as e:
        print(f"错误: {e}")
        return 1
    except KeyboardInterrupt:
        print("\n\n用户中断操作")
        return 1
    except Exception as e:
        print(f"意外错误: {e}")
        return 1


if __name__ == "__main__":
    sys.exit(main())
