

"""
n=200
k=6
sigma=[1,2,3,4,5,6]
w=[1,1,1,1,1,1,1]
A=[1,3,8,10]
r=len(A)

err_m100=[np.float64(0.017924641841648548), np.float64(0.017924641841648437), np.float64(4.5978732330868696e-27), np.float64(4.5978732330868696e-27), np.float64(4.5978732330868696e-27)]
err_m1000=[np.float64(0.0029246418416485342), np.float64(0.0029246418416484232), np.float64(4.5978732330868696e-27), np.float64(4.5978732330868696e-27), np.float64(4.5978732330868696e-27)]
err_m10000=[np.float64(0.0014246418416485884), np.float64(0.001424641841648422), np.float64(4.5978732330868696e-27), np.float64(4.5978732330868696e-27), np.float64(4.5978732330868696e-27)]




log_err_m100=[np.log(i) for i in err_m100]
log_err_m1000=[np.log(i) for i in err_m1000]
log_err_m10000=[np.log(i) for i in err_m10000]



n= 200 
k= 10 
p= 0.5
sigma=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 
A=[1, 3, 8, 10, 15, 16]

r=len(A)

err_m100= [np.float64(0.013347328598040109), np.float64(0.0010820312047190583), np.float64(0.00705601267190001), np.float64(0.005209284721420736), np.float64(1.4857834613322708e-27), np.float64(1.4857834613322708e-27), np.float64(1.4857834613322708e-27)]
err_m500= [np.float64(0.013347328598040109), np.float64(0.005082031204719062), np.float64(0.00705601267190001), np.float64(0.0012092847214207362), np.float64(1.4857834613322708e-27), np.float64(1.4857834613322708e-27), np.float64(1.4857834613322708e-27)]
err_m1000= [np.float64(0.0033473285980401), np.float64(0.0019179687952809443), np.float64(0.0020560126719000107), np.float64(0.0032092847214207363), np.float64(1.4857834613322708e-27), np.float64(1.4857834613322708e-27), np.float64(1.4857834613322708e-27)]
data = pd.DataFrame({
    "error": log_err_m100 + log_err_m1000 + log_err_m10000,
    "sample size": ["m = 100"] * len(err_m100) + ["m = 500"] * len(err_m500) + ["m = 1000"] * len(err_m1000)
})

# Plot
sns.set(style="whitegrid")
plt.figure(figsize=(8, 6))

# Boxplot with hue = x (to use palette without warning)
sns.boxplot(x="sample size", y="error", hue="sample size", data=data, palette="Set3")
plt.legend([],[], frameon=False)  # hide legend

# Overlay data points
#sns.stripplot(x="sample size", y="error", data=data, color='black', size=6, jitter=True)

#plt.yscale("log")
plt.title("Box Plot of DypChiP Error for Various Sample Sizes", fontsize=14)
plt.ylabel("|Prob(DypChiP) - Empirical Prob|")
plt.xlabel("Sample Size")
plt.tight_layout()




file_path = f"/Users/sh1678/Dropbox/Research/Mallows/topkmallows-choices/Plots-synthetic-data/DyPCHiP_Err_n{n}_k{k}_r_{r}_log.png"

plt.savefig(file_path)



data = pd.DataFrame({
    "error": log_err_m100 + log_err_m1000 + log_err_m10000,
    "sample size": ["m = 100"] * 5 + ["m = 1000"] * 5 + ["m = 10000"] * 5
})

# Compute means
means = data.groupby("sample size")["error"].mean().reset_index()

# Plot boxplot
sns.set(style="whitegrid")
plt.figure(figsize=(8, 6))

ax = sns.boxplot(
    x="sample size",
    y="error",
    hue="sample size",
    data=data,
    palette="Set3",
    showcaps=True,
    boxprops={'facecolor': 'None'},
    medianprops={'color': 'red', 'linewidth': 2},  # Highlight median
    whiskerprops={'linewidth': 1.5},
    flierprops={'marker': 'o', 'markersize': 5}
)

# Plot means
positions = [0, 1, 2]
plt.scatter(
    positions,
    means["error"],
    color='blue',
    zorder=10,
    label='Mean',
    marker='D',
    s=70
)

plt.legend(["Median (red bar)", "Mean (blue diamond)"])
#plt.yscale("log")
plt.title("Box Plot with Highlighted Mean and Median", fontsize=14)
plt.ylabel("log(|Prob(DypChiP) - Empirical Prob|)")
plt.xlabel("Sample Size")
plt.tight_layout()


file_path = f"/Users/sh1678/Dropbox/Research/Mallows/topkmallows-choices/Plots-synthetic-data/DyPCHiP_Err_n{n}_k{k}_r_{r}_mean_median.png"

plt.savefig(file_path)


err_m100= [np.float64(0.013347328598040109), np.float64(0.0010820312047190583), np.float64(0.00705601267190001), np.float64(0.005209284721420736), np.float64(1.4857834613322708e-27), np.float64(1.4857834613322708e-27), np.float64(1.4857834613322708e-27)]
err_m500= [np.float64(0.013347328598040109), np.float64(0.005082031204719062), np.float64(0.00705601267190001), np.float64(0.0012092847214207362), np.float64(1.4857834613322708e-27), np.float64(1.4857834613322708e-27), np.float64(1.4857834613322708e-27)]
err_m1000= [np.float64(0.0033473285980401), np.float64(0.0019179687952809443), np.float64(0.0020560126719000107), np.float64(0.0032092847214207363), np.float64(1.4857834613322708e-27), np.float64(1.4857834613322708e-27), np.float64(1.4857834613322708e-27)]
log_err_m10000=[np.float64(0.0018473285980401544), np.float64(0.002182031204719048), np.float64(0.00014398732809998982), np.float64(0.00019071527857926398), np.float64(1.4857834613322708e-27), np.float64(1.4857834613322708e-27), np.float64(1.4857834613322708e-27)]

data = [err_m100 , err_m1000 , err_m10000 ]




p=0.1
num_cl=2

# Given stats
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.patches import Patch


# First group p=0.1
means1 = [0.07328, 0.1877]
std_devs1 = [0.0301, 0.002]

# Second group p=1.25
means2 = [0.07884482, 0.18838]
std_devs2 = [0.03158, 0.00217]

# Third group p=5

means2 = [0.07884482, 0.18838]
std_devs2 = [0.03158, 0.00217]
#means3 = [0.145641, 0.1892]
#std_devs3 = [0.1887818, 0.0013]

# All settings
all_means = [means1, means2,means3, means4]
all_stds = [std_devs1, std_devs2,std_devs3,std_devs4]
group_labels = ['p=0.1', 'p=1.25']
series_labels = ['top-K MM', 'MNL']
colors = ['skyblue', 'lightgreen']

# Simulate data
np.random.seed(0)
data_groups = [[
    np.random.normal(loc=mean, scale=std, size=100)
    for mean, std in zip(group_means, group_stds)
] for group_means, group_stds in zip(all_means, all_stds)]

# Plot with larger vertical size
fig, ax = plt.subplots(figsize=(6, 4))  # Make plot taller

# x-positions for each group (no offset — fully overlaid)
x_positions = [1, 2]

# Plot each group fully overlaid
for group_idx, group_data in enumerate(data_groups):
    for i in range(2):
        ax.boxplot(group_data[i],
                   positions=[x_positions[group_idx]],
                   widths=0.4,
                   patch_artist=True,
                   boxprops=dict(facecolor=colors[i], alpha=0.5),
                   medianprops=dict(color='red'),
                   showfliers=False)

# Set x-axis
ax.set_xticks(x_positions)
ax.set_xticklabels(group_labels)

# Y-axis limits (adjust to better fit your data)
ax.set_ylim(0.01, 0.20)  # You can tweak this based on your actual error range

# Legend
legend_elements = [Patch(facecolor=colors[i], alpha=0.5, label=series_labels[i]) for i in range(2)]
ax.legend(handles=legend_elements)

# Styling
ax.set_ylabel('test Error ')
ax.set_title('Test error of MNL vs top-k MM model.')
ax.grid(True)
plt.tight_layout()

"""




import matplotlib.pyplot as plt
import numpy as np
from matplotlib.patches import Patch

# Define the means and standard deviations
mean1 = 0.05983601
std1 = 0.02249395

mean2 = 0.04455828
std2 = 0.01635027

mean3 = 0.05043584
std3 = 0.02462151

meanMNL = 0.16827143997207789
stdMNL = 0.0027038801497929975

# All settings
all_means = [mean1, mean2, mean3, meanMNL]
all_stds = [std1, std2, std3, stdMNL]
labels = ['p=0.1,beta=0.1', 'p=0.5,beta', 'p=1,beta=0.05', 'MNL']
colors = ['skyblue','skyblue','skyblue', 'lightgreen']
legend_color=['skyblue', 'lightgreen']
legend_labels=['TopKMM','MNL']

# Simulate data
np.random.seed(0)
data_groups = [
    np.random.normal(loc=mean, scale=std, size=100)
    for mean, std in zip(all_means, all_stds)
]

# Plot
fig, ax = plt.subplots(figsize=(8, 6))

# x-positions for each plot
x_positions = np.arange(1, len(all_means) + 1)

# Plot each series as a separate box plot
for i, data in enumerate(data_groups):
    ax.boxplot(data,
               positions=[x_positions[i]],  # Position for each box plot
               widths=0.4,
               patch_artist=True,
               boxprops=dict(facecolor=colors[i], alpha=0.5),
               medianprops=dict(color='red'),
               showfliers=False)

# X-axis labels
ax.set_xticks(x_positions)
ax.set_xticklabels(labels)

# Legend
legend_elements = [Patch(facecolor=legend_color[i], alpha=0.5, label=legend_labels[i]) for i in range(2)]
ax.legend(handles=legend_elements)

# Styling
ax.set_ylabel('test erros')
ax.set_title('test error of  MNL vs topKMM')
ax.grid(True)
plt.tight_layout()
plt.show()

file_path = f"/Users/sh1678/Dropbox/Research/Mallows/topkmallows-choices/Plots-sushi/testerr_one_cluster.png"

plt.savefig(file_path)