import librosa
import numpy as np
import soundfile as sf
from typing import Tuple, Optional
import logging

logger = logging.getLogger(__name__)


class AudioProcessor:
    """音频处理模块，负责音频加载、时长计算、静音检测和SNR计算"""

    def __init__(self, sample_rate: int = 16000):
        self.sample_rate = sample_rate

    def load_audio(self, file_path: str) -> Tuple[np.ndarray, int]:
        """
        加载音频文件

        Args:
            file_path: 音频文件路径

        Returns:
            audio_data: 音频数据
            sample_rate: 采样率
        """
        try:
            audio_data, sample_rate = librosa.load(file_path, sr=self.sample_rate)
            return audio_data, sample_rate
        except Exception as e:
            logger.error(f"Failed to load audio file {file_path}: {e}")
            raise

    def get_duration(self, audio_data: np.ndarray) -> float:
        """
        计算音频持续时间

        Args:
            audio_data: 音频数据

        Returns:
            duration: 音频持续时间（秒）
        """
        return len(audio_data) / self.sample_rate

    def calculate_silence_ratio(
        self, audio_data: np.ndarray, threshold_db: float = -40.0
    ) -> float:
        """
        计算静音比例

        Args:
            audio_data: 音频数据
            threshold_db: 静音检测阈值（dB）

        Returns:
            silence_ratio: 静音段时长与总时长的比值
        """
        try:
            # 计算音频能量
            energy = librosa.feature.rms(y=audio_data)[0]

            # 转换为dB
            energy_db = 20 * np.log10(energy + 1e-10)

            # 检测静音段
            silence_mask = energy_db < threshold_db

            # 计算静音比例
            silence_ratio = np.sum(silence_mask) / len(silence_mask)

            return silence_ratio
        except Exception as e:
            logger.error(f"Failed to calculate silence ratio: {e}")
            return 1.0  # 返回1.0表示全部静音

    def calculate_snr(self, audio_data: np.ndarray) -> float:
        """
        计算信噪比（SNR）

        Args:
            audio_data: 音频数据

        Returns:
            snr: 信噪比（dB）
        """
        try:
            # 使用librosa的VAD（Voice Activity Detection）来分离语音和噪声
            # 这里使用简化的方法：假设低能量部分为噪声
            energy = librosa.feature.rms(y=audio_data)[0]

            # 计算能量阈值（使用中位数作为阈值）
            threshold = np.median(energy)

            # 分离语音和噪声
            speech_mask = energy > threshold
            noise_mask = energy <= threshold

            if np.sum(speech_mask) == 0 or np.sum(noise_mask) == 0:
                return 0.0  # 无法计算SNR

            # 计算语音和噪声的平均能量
            speech_energy = np.mean(energy[speech_mask])
            noise_energy = np.mean(energy[noise_mask])

            # 计算SNR
            snr = 20 * np.log10(speech_energy / (noise_energy + 1e-10))

            return snr
        except Exception as e:
            logger.error(f"Failed to calculate SNR: {e}")
            return 0.0

    def check_audio_quality(
        self,
        file_path: str,
        min_duration: float = 3.0,
        max_duration: float = 15.0,
        max_silence_ratio: float = 0.3,
        min_snr: float = 10.0,
    ) -> Tuple[bool, dict]:
        """
        检查音频质量

        Args:
            file_path: 音频文件路径
            min_duration: 最小持续时间（秒）
            max_duration: 最大持续时间（秒）
            max_silence_ratio: 最大静音比例
            min_snr: 最小信噪比（dB）

        Returns:
            is_qualified: 是否通过质量检查
            metrics: 质量指标字典
        """
        try:
            # 加载音频
            audio_data, sample_rate = self.load_audio(file_path)

            # 计算各项指标
            duration = self.get_duration(audio_data)
            silence_ratio = self.calculate_silence_ratio(audio_data)
            snr = self.calculate_snr(audio_data)

            # 检查各项指标
            duration_ok = min_duration <= duration <= max_duration
            silence_ok = silence_ratio <= max_silence_ratio
            snr_ok = snr >= min_snr

            # 综合判断
            is_qualified = duration_ok and silence_ok and snr_ok

            metrics = {
                "duration": duration,
                "silence_ratio": silence_ratio,
                "snr": snr,
                "duration_ok": duration_ok,
                "silence_ok": silence_ok,
                "snr_ok": snr_ok,
            }

            return is_qualified, metrics

        except Exception as e:
            logger.error(f"Failed to check audio quality for {file_path}: {e}")
            return False, {"error": str(e)}
