

import numpy as np


def compute_eta(T, beta):

    return 1.0 / (T ** ((beta + 1.0) / (4 * beta + 2.0)))


def compute_t_beta(d, T, beta, delta, C_pilot=10):

    exponent = (beta + 1.0) / (2.0 * beta + 1.0)
    t_beta_theory = d*T ** exponent
    

    t_beta_practical = t_beta_theory#C_pilot * np.sqrt(T) * d
    
    max_allowed = int(0.8 * T) #int(0.2 * T)
    t_beta = min(t_beta_theory, t_beta_practical, max_allowed)
    
    return int(np.ceil(max(t_beta, 10 * d)))  


def collect_pilot_data(env, t_beta, p_max, verbose=False):

    d = len(env.theta_0)  
    
    C = np.zeros((t_beta, d))
    Y = np.zeros(t_beta)
    P = np.zeros(t_beta)

    for t in range(t_beta):
        c_t = env.sample_context()
        
        p_t = np.random.uniform(0, p_max)
        
        y_t = env.generate_outcome(c_t, p_t)
        
        C[t] = c_t
        P[t] = p_t
        Y[t] = y_t
        
        if verbose and (t + 1) % max(1, t_beta // 10) == 0:
            print(f"  进度: {t+1}/{t_beta}")
    

    return C, Y, P


def solve_ols(C, Y, p_max, ridge=1e-6):

    R = p_max * Y
    
    theta_bar, residuals, rank, s = np.linalg.lstsq(C, R, rcond=None)
    
    return theta_bar


def compute_pilot_regret(env, C, P):

    total_regret = 0.0
    
    for c_t, p_t in zip(C, P):
        r_t = env.compute_expected_revenue(c_t, p_t)
        
        _, r_opt = env.compute_optimal_price(c_t)
        
        total_regret += (r_opt - r_t)
    
    return total_regret


def run_pilot_estimation(env, T, beta, p_max, delta=0.05, C_pilot=10, verbose=False):

    d = len(env.theta_0)
    
    eta = compute_eta(T, beta)
    t_beta = compute_t_beta(d, T, beta, delta, C_pilot)
    

    
    C, Y, P = collect_pilot_data(env, t_beta, p_max, verbose)
    
    theta_bar = solve_ols(C, Y, p_max)
    
    estimation_error = np.linalg.norm(theta_bar - env.theta_0)
    pilot_regret = compute_pilot_regret(env, C, P)
    

    
    pilot_info = {
        'eta': eta,
        't_beta': t_beta,
        'C': C,
        'Y': Y,
        'P': P,
        'regret': pilot_regret,
        'estimation_error': estimation_error,
        'theta_true': env.theta_0.copy(),
        'theta_bar': theta_bar.copy()
    }
    
    return theta_bar, pilot_info



def quick_pilot(env, T, beta, verbose=True):

    return run_pilot_estimation(
        env, T, beta, env.p_max,
        delta=0.05, C_pilot=10, verbose=verbose
    )

