# ==============================================
# Linear Strategic Classification Baseline
# 
# This script implements a baseline model for strategic classification,
# where agents can strategically manipulate their features to increase
# the chance of receiving a favorable classification outcome.
# 
# The classifier used is a logistic regression model trained on the Adult dataset as example.
# The classifier used cross_entropy loss.
#
# After each round of strategic manipulation, the script measures:
#   1. KL Divergence: the distributional shift between the manipulated and original agent populations.
#   2. Mean Shift: the L2 distance between feature means before and after manipulation.
#
# The outputs include acceptance rates, KL divergence, and mean shifts over multiple rounds,
# along with visualizations of how these metrics evolve.
# ==============================================



import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import entropy
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

# =========================
# data load, Adult for example
# =========================
def load_data(path):
    column_names = [
        'age', 'workclass', 'fnlwgt', 'education', 'education-num',
        'marital-status', 'occupation', 'relationship', 'race', 'sex',
        'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income'
    ]
    df = pd.read_csv(path, names=column_names, na_values=' ?', skipinitialspace=True)
    df.dropna(inplace=True)
    df.drop(columns=['fnlwgt', 'education', 'sex'], inplace=True)
    df['income'] = df['income'].map({'<=50K': 0, '>50K': 1})

    X = df.drop(columns=['income'])
    y = df['income'].values

    numeric_features = ['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
    categorical_features = ['workclass', 'marital-status', 'occupation', 'relationship', 'race', 'native-country']

    preprocessor = ColumnTransformer([
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore', sparse=False), categorical_features)
    ])
    
    X_processed = preprocessor.fit_transform(X)
    return X_processed, y

# =========================
# Distribution shift
# =========================
def get_distribution_stats(X_ref, X_new, num_bins=100):
    X_ref = np.asarray(X_ref)
    X_new = np.asarray(X_new)
    assert X_ref.shape[1] == X_new.shape[1], "维度不一致"

    kls = []
    for i in range(X_ref.shape[1]):
        p, _ = np.histogram(X_ref[:, i], bins=num_bins, density=True)
        q, _ = np.histogram(X_new[:, i], bins=num_bins, density=True)
        p += 1e-8
        q += 1e-8
        kls.append(entropy(p, q))
    kl_div = np.mean(kls)

    mean_shift = np.linalg.norm(X_new.mean(axis=0) - X_ref.mean(axis=0))
    return kl_div, mean_shift

# =========================
# cross_entropy
# =========================
def compute_cross_entropy(y_true, y_prob, eps=1e-8):
    y_prob = np.clip(y_prob, eps, 1 - eps)
    return -np.mean(y_true * np.log(y_prob) + (1 - y_true) * np.log(1 - y_prob))

# =========================
# agents
# =========================
class StrategicAgent:
    def __init__(self, model, cost_coeff=0.1, lr=0.1, max_iter=50):
        self.model = model
        self.cost_coeff = cost_coeff
        self.lr = lr
        self.max_iter = max_iter
        self.cov_inv_ = None

    def set_cost_matrix(self, X):
        self.cov_inv_ = np.linalg.pinv(np.cov(X.T) + 1e-6 * np.eye(X.shape[1]))

    def strategic_perturbation(self, x_orig):
        x = x_orig.copy()
        for _ in range(self.max_iter):
            grad = self._compute_gradient(x)
            delta = x - x_orig
            cost_grad = 2 * self.cost_coeff * self.cov_inv_ @ delta
            x += self.lr * (grad - cost_grad)
            x = self._project_features(x, x_orig)
        return x

    def _compute_gradient(self, x):
        epsilon = 1e-5
        grad = np.zeros_like(x)
        for i in range(len(x)):
            x_plus = x.copy()
            x_plus[i] += epsilon
            x_minus = x.copy()
            x_minus[i] -= epsilon
            prob_plus = self.model.predict_proba(x_plus.reshape(1, -1))[0, 1]
            prob_minus = self.model.predict_proba(x_minus.reshape(1, -1))[0, 1]
            grad[i] = (prob_plus - prob_minus) / (2 * epsilon)
        return grad

    def _project_features(self, x, x_orig):
        x_proj = x.copy()
        x_proj[0] = max(x_proj[0], x_orig[0])  # 年龄不得减小
        x_proj[4] = np.clip(x_proj[4], 0, 100)  # 工作时间限制
        return x_proj

# =========================
# Jury
# =========================
def simulate_acceptance_only(data_path, rounds=20):
    X, y = load_data(data_path)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    model = LogisticRegression(max_iter=1000, class_weight='balanced')
    model.fit(X_train, y_train)

    agent = StrategicAgent(model, cost_coeff=0.2, lr=0.5)
    agent.set_cost_matrix(X_train)

    original_agents = X_test.copy()
    acceptance_rates = []
    kl_history = []
    shift_history = []
    loss_history = []

    for round in range(rounds):
        strategic_X = np.array([agent.strategic_perturbation(x) for x in original_agents])
        y_pred = model.predict(strategic_X)
        y_prob = model.predict_proba(strategic_X)[:, 1]

        accept_rate = y_pred.mean()
        kl_div, mean_shift = get_distribution_stats(original_agents, strategic_X)
        ce_loss = compute_cross_entropy(y_test, y_prob)

        acceptance_rates.append(accept_rate)
        kl_history.append(kl_div)
        shift_history.append(mean_shift)
        loss_history.append(ce_loss)

        print(f"Round {round+1}: Acceptance Rate = {accept_rate:.3f}, KL = {kl_div:.4f}, Mean Shift = {mean_shift:.4f}, Loss = {ce_loss:.4f}")

    # 可视化
    plt.figure(figsize=(18, 5))

    plt.subplot(1, 4, 1)
    plt.plot(acceptance_rates, marker='o', label='Acceptance Rate')
    plt.title("Acceptance Rate over Rounds")
    plt.xlabel("Round")
    plt.ylabel("Rate")
    plt.grid(True)

    plt.subplot(1, 4, 2)
    plt.plot(kl_history, marker='s', color='orange', label='KL Divergence')
    plt.title("KL Divergence over Rounds")
    plt.xlabel("Round")
    plt.ylabel("KL Divergence")
    plt.grid(True)

    plt.subplot(1, 4, 3)
    plt.plot(shift_history, marker='^', color='green', label='Mean Shift')
    plt.title("Mean Shift over Rounds")
    plt.xlabel("Round")
    plt.ylabel("L2 Distance")
    plt.grid(True)

    plt.subplot(1, 4, 4)
    plt.plot(loss_history, marker='x', color='red', label='Cross-Entropy Loss')
    plt.title("Jury Cross-Entropy Loss")
    plt.xlabel("Round")
    plt.ylabel("Loss")
    plt.grid(True)

    plt.tight_layout()
    plt.show()


if __name__ == "__main__":
    simulate_acceptance_only("adult.data")

