import os
import numpy as np
from scipy.spatial.distance import cosine
import pandas as pd

# 文件夹路径和文件名列表
folder_path = '/home/byzeng/project/weights-search/inputweightsxxnew'
# for key1,key2 in zip(['falcon-40b-instruct','Qwen-7B-Chat','mpt-7b-instruct','Llama-2-7B-fp16','Baichuan-13B-Chat','internlm-7b'],\
#                     ['falcon-40b','Qwen-7B','mpt-7b','Llama-2-7b-chat-fp16','Baichuan-13B-Base','internlm-chat-7b']):
for key1,key2 in zip(['falcon-40b-sft-top1-560','firefly-qwen-7b','mpt-7b-storywriter','LLaMA-2-7B-32K','Baichuan-13B-sft','firefly-internlm-7b'],\
                    ['falcon-40b','Qwen-7B','mpt-7b','Llama-2-7B-fp16','Baichuan-13B-Base','internlm-7b']):
# # for key1,key2 in zip(['falcon-40b-instruct','Qwen-7B-Chat','GPT-NeoXT-Chat-Base-20B','Llama-2-7B-fp16','Baichuan-13B-Chat','internlm-7b'],\
# #                     ['falcon-40b','Qwen-7B','gpt-neox-20b','Llama-2-7b-chat-fp16','Baichuan-13B-Base','internlm-chat-7b']):
    file1_path = os.path.join(folder_path, f'{key1}.npy')
    file2_path = os.path.join(folder_path, f'{key2}.npy')
    vector1 = np.load(file1_path)
    vector2 = np.load(file2_path)
    similarity = 100*(1 - cosine(vector1.flatten(), vector2.flatten()))
    print(key1,key2,similarity)
# file_names = ['llama-7b-hf', 'Llama-2-7B-fp16', 'pythia-6.9B','THUDM_chatglm-6b','Qwen-7B', 'THUDM_chatglm2-6b', 'OPT-6.7B',
            #   'internlm-7b', 'open_llama_7b','gpt-j-6b','codegeex2-6b', 'stablelm-base-alpha-7b', 'RedPajama-INCITE-7B-Base','bloom-7b1', 'baichuan-7B']
# file_names = ['gpt2-large','Cerebras-GPT-1.3B','gpt-neo-2.7B','THUDM_chatglm-6b','THUDM_chatglm2-6b','OPT-6.7B','pythia-6.9B','llama-7b-hf','Qwen-7B', 'Llama-2-7B-fp16','RedPajama-INCITE-7B-Base', 'bloom-7b1',  'internlm-7b', 
#               'open_llama_7b','baichuan-7B','pythia-12b','huggyllama_llama-13b','gpt-neox-20b',"opt-30b",'huggyllama_llama-30b','galactica-30b','huggyllama_llama-65b',
#               'galactica-120b',"falcon-180B"]

# file_names = ['pythia-12b','huggyllama_llama-13b']

# file_names = [ 
#               'huggyllama_llama-65b','galactica-120b',"falcon-180B"]

# file_names = ['gpt2-large',
#               'Cerebras-GPT-1.3B','gpt-neo-2.7B']

# file_names = ["opt-30b",'huggyllama_llama-30b',
#               'galactica-30b','gpt-neox-20b',"mpt-30b"]

# file_names = ['llama-7b-hf','Qwen-7B', 'Llama-2-7B-fp16','RedPajama-INCITE-7B-Base', 'THUDM_chatglm-6b','bloom-7b1', 'THUDM_chatglm2-6b', 'pythia-6.9B','internlm-7b', 
#               'open_llama_7b','OPT-6.7B', 'baichuan-7B']

# file_names = ['llama-7b-hf','MiniGPT-4-LLaMA-7B','alpaca-native', 'medalpaca-7b','vicuna-7b-v1.3', 'wizardLM-7B-HF','baize-v2-7b','alpaca-lora-7b', 
#               'chinese-alpaca-7b-merged','koala-7b', 'chinese-llama-7b-merged','beaver-7b-v1.0',"Guanaco","BiLLa-7B-SFT"]
# # file_names = ['gptneox_seed1', 'gptneox_seed2', 
# #               'gptneox_seed3', 'gptneox_seed4']
# # 读取.npy文件并计算余弦相似度
# cosine_matrix = np.zeros((len(file_names), len(file_names)))
# for i, file1 in enumerate(file_names):
#     for j, file2 in enumerate(file_names):
#         if i <= j:
#             file1_path = os.path.join(folder_path, f'{file1}.npy')
#             file2_path = os.path.join(folder_path, f'{file2}.npy')

#             if os.path.exists(file1_path) and os.path.exists(file2_path):
#                 vector1 = np.load(file1_path)
#                 vector2 = np.load(file2_path)
#                 similarity = 100*(1 - cosine(vector1.flatten(), vector2.flatten()))
#                 cosine_matrix[i, j] = similarity
#                 cosine_matrix[j, i] = similarity

# # 创建DataFrame来存储余弦相似度
# df = pd.DataFrame(cosine_matrix, columns=file_names, index=file_names)

# 生成LaTeX表格
latex_table = df.to_latex(caption='Cos values between different models', float_format="%.2f", escape=False)

# 打印LaTeX表格
print(latex_table)
