import pandas as pd
import numpy as np

# ---------- 公共工具 ----------
def robust_z_iqr(x, eps=1e-9):
    """对重尾更稳的 z-score：用 median / IQR"""
    x = np.asarray(x, dtype=np.float64)
    med = np.median(x)
    q75, q25 = np.percentile(x, [75, 25])
    iqr = max(q75 - q25, eps)
    return (x - med) / iqr

def rank_normalize(z):
    """秩归一化到(0,1)，与量纲脱钩；可替换为 sigmoid"""
    z = np.asarray(z, dtype=np.float64)
    ranks = np.argsort(np.argsort(z))  # 0..L-1
    return (ranks + 1) / (len(z) + 1)  # (0,1)

def variance_weights(features, eps=1e-8):
    """根据跨层方差的逆来给各指标分配权重（方差越小权重越大）"""
    vars_ = np.array([np.var(f) for f in features], dtype=np.float64)
    inv = 1.0 / (vars_ + eps)
    w = inv / inv.sum()
    return w

def zscore(x, eps=1e-9):
    x = np.asarray(x, dtype=np.float64)
    mu, sd = x.mean(), x.std()
    sd = sd if sd > eps else 1.0
    return (x - mu) / sd

# ---------- 加载敏感值 ----------
def read_pre_sensitivity(path="", type="taylor"):
    if type == "taylor":
        return read_taylor_value(path)
    if type == "ppl":
        return read_ppl_value(path)
    if type == "similarity":
        return read_similarity_value(path)

def read_taylor_value(path):
    df = pd.read_csv(path)
    pre_sensitivity = df["block_score"].tolist()
    pre_sensitivity = pre_sensitivity[1:-1]
    # print(pre_sensitivity)

    pre_sensitivity = np.asarray(pre_sensitivity, dtype=np.float64)
    pre_sensitivity = np.log1p(np.clip(pre_sensitivity, 0.0, None))  # 重尾压缩
    pre_sensitivity = robust_z_iqr(pre_sensitivity)
    pre_sensitivity = rank_normalize(pre_sensitivity)
    # print(pre_sensitivity)

    return pre_sensitivity

def read_ppl_value(path):
    df = pd.read_csv(path)
    pre_sensitivity = df["ppl_bookcorpus"].tolist()
    pre_sensitivity = pre_sensitivity[1:-1]
    # print(pre_sensitivity)

    pre_sensitivity = np.log1p(pre_sensitivity)  # log(1+Δ)
    pre_sensitivity = robust_z_iqr(pre_sensitivity)
    pre_sensitivity = rank_normalize(pre_sensitivity)
    # print(pre_sensitivity)

    return pre_sensitivity

def read_similarity_value(path):
    df = pd.read_csv(path)
    pre_sensitivity = df["block_score"].tolist()
    pre_sensitivity = pre_sensitivity[1:-1]
    # print(pre_sensitivity)

    pre_sensitivity = robust_z_iqr(pre_sensitivity)
    pre_sensitivity = rank_normalize(pre_sensitivity)
    # print(pre_sensitivity)

    return pre_sensitivity

if __name__ == "__main__":
    pre_sensitivity = read_pre_sensitivity(
        path="utils/sensitivity/llama2_output/similarity/block_score_all.csv",
        type="similarity"
    )
    print(len(pre_sensitivity))

    pre_sensitivity = read_pre_sensitivity(
        path="utils/sensitivity/llama2_output/taylor/block_score_all.csv",
        type="taylor"
    )
    print(len(pre_sensitivity))

    pre_sensitivity = read_pre_sensitivity(
        path="utils/sensitivity/llama2_output/ppl/all_ppl_unsorted.csv",
        type="ppl"
    )
    print(len(pre_sensitivity))
