from gensim.models import Word2Vec
import json

# 输入数据
sentences = [
    ["Block", "StateVariableDeclaration", "EmitStatement", "contract", "Conditional", "FunctionCall", "NumberLiteral",
     "ThrowStatement", "ExpressionStatement", "MemberAccess", "ReturnStatement", "IndexAccess", "ForStatement",
     "StringLiteral", "interface", "TupleExpression", "BooleanLiteral", "IfStatement", "ModifierDefinition",
     "StructDefinition", "EventDefinition", "InlineAssemblyStatement", "WhileStatement", "library", "Identifier",
     "UnaryOperation", "VariableDeclarationStatement", "PragmaDirective", "BinaryOperation",
     "ElementaryTypeNameExpression", "EnumDefinition", "ContractDefinition", "FunctionDefinition",
     "UsingForDeclaration"]
]

# 训练 Word2Vec 模型
model = Word2Vec(sentences, vector_size=32, window=2, min_count=1, workers=4, epochs=100)

# 保存模型
model.save("word2vec_model")

# 打开模型并获得单词向量
model = Word2Vec.load("word2vec_model")

# 保存单词及其对应向量
word_vectors = model.wv
words = word_vectors.index_to_key
vectors = word_vectors.vectors

# 设置阈值
threshold = 1e-4

# 构建字典，限制每个向量值为 6 位小数，并将非常小的数值设置为 0
word_vectors_dict = {
    word: [0 if abs(value) < threshold else round(float(value), 6) for value in vector]
    for word, vector in zip(words, vectors)
}

# 将字典保存到 JSON 文件中
with open("word_vectors.json", "w") as file:
    json.dump(word_vectors_dict, file)

print("Word vectors have been saved to 'word_vectors.json'")
