

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


dftw=pd.read_csv('./data/TW_2024_hourly.csv')  
dfus = pd.read_csv('./data/US_2024_hourly.csv')
dfkr= pd.read_csv('./data/KR_2024_hourly.csv')
dfjp = pd.read_csv('./data/JP_2024_hourly.csv')
dfcn = pd.read_csv('./data/CN_2024_hourly.csv')



tw_data = dftw['Carbon intensity'].values
us_data = dfus['Carbon intensity'].values
kr_data = dfkr['Carbon intensity'].values
jp_data = dfjp['Carbon intensity'].values
cn_data = dfcn['Carbon intensity'].values


total_samples = 365 * 24 * 4  
ratio_sum = 4+3+30+30+26

tw_samples = int(total_samples * 4 / ratio_sum)
us_samples = int(total_samples * 3 / ratio_sum)
kr_samples = int(total_samples * 30 / ratio_sum)
jp_samples = int(total_samples * 30 / ratio_sum)
cn_samples = int(total_samples * 26/ ratio_sum)


remaining_samples = total_samples - (tw_samples + us_samples + kr_samples + jp_samples + cn_samples)

kr_samples += remaining_samples


tw_random = np.random.choice(tw_data, size=tw_samples, replace=True)
us_random = np.random.choice(us_data, size=us_samples, replace=True)
kr_random = np.random.choice(kr_data, size=kr_samples, replace=True)
jp_random = np.random.choice(jp_data, size=jp_samples, replace=True)
cn_random = np.random.choice(cn_data, size=jp_samples, replace=True)


combined_array = np.concatenate([tw_random, us_random, kr_random, jp_random,cn_random])


np.random.shuffle(combined_array)

df_ssd = combined_array


ppa_samsumg= np.array([14.4,17.7,93.1,93.4,93.4])/100 
ppa_hynix= np.array([4,29.6,30,29.9])/100 
ppa_micron= np.array([0.02,0.04,0.09]) 
ppa_wd= np.array([0,0.1,0.25,0.3,0.4]) 
ppa_seagate= np.array([0.5]) 
ppa_kioxia= np.array([0.02,0.1,0.3]) 
ppa_intel = np.array([0.65,0.8,0.73,0.71,0.71,0.82,0.8,0.93])

ppa_ssd = np.concatenate((ppa_samsumg, ppa_hynix, ppa_micron,ppa_wd,ppa_seagate,ppa_kioxia,ppa_intel))

ci_ppa_0out = (df_ssd[:, np.newaxis] * (1 - ppa_ssd)[np.newaxis, :]).flatten()


# ci_ppa_50out = ((df['Carbon intensity']/(1-0.5*df['Carbon-free energy percentage (CFE%)']/100)).values[:, None] * (1-ppa_tsmc)).flatten()






plt.figure(figsize=(10, 6),dpi=500)



sns.histplot(ci_ppa_0out, kde=True, stat='density', 
              alpha=0.2, label='Market-based CI for SSD', color='b',edgecolor='none')

sns.histplot(df_ssd, kde=True, stat='density', 
              alpha=0.2, label='Location-based CI for SSD', color='r',edgecolor='none')


plt.xlabel('Carbon intensity', fontsize=30)
plt.ylabel('Probability density', fontsize=30)


plt.legend(fontsize=25)

plt.xticks(fontsize=25)
plt.yticks(fontsize=25)


plt.tight_layout()
plt.savefig('ci_ssd.pdf',dpi=500)
