import json
import random
import os

# 读取 all_features.json 文件内容
with open('dataset/all_features.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# 随机打乱数据
random.shuffle(data)

# 按 8:2 比例划分数据
split_index = int(len(data) * 0.8)
train_data = data[:split_index]
evaluate_data = data[split_index:]

# 将 train_data 写入 train.json 文件
with open('dataset/train.json', 'w', encoding='utf-8') as train_file:
    json.dump(train_data, train_file, ensure_ascii=False)

# 将 evaluate_data 写入 evaluate.json 文件
with open('dataset/evaluate.json', 'w', encoding='utf-8') as evaluate_file:
    json.dump(evaluate_data, evaluate_file, ensure_ascii=False)

print("数据划分完成，已生成 train.json 和 evaluate.json 文件。")

import json
import random


# Function to load data from JSON file
def load_data(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        return json.load(file)


# Function to save data to JSON file
def save_data(data, filename):
    with open(filename, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False)


# Load all data from all_features.json
data = load_data('dataset/all_features.json')
print(len(data))

# Filter evaluate_data based on contract_name condition
filtered_datas = [item for item in data if
                  (int(item['contract_name'][:-4]) < 1061) or (int(item['contract_name'][:-4]) > 2918)]
rest_data = [item for item in data if
             (int(item['contract_name'][:-4]) >= 1061) and (int(item['contract_name'][:-4]) <= 2918)]

random.shuffle(filtered_datas)

evaluate_data_filtered = filtered_datas[:667]
train_data = filtered_datas[667:]

train_data = train_data.extend(rest_data)

# Save train_data to dataset.json
save_data(train_data, r'E:\2024\experiment_code_clone\total4\all_features\dataset_clone\dataset.json')

# Save evaluate_data_filtered to evaluate_clone.json
save_data(evaluate_data_filtered,
          r'E:\2024\experiment_code_clone\total4\all_features\dataset_clone\evaluate_clone.json')

print("数据划分完成，已生成 dataset.json 和 evaluate_clone.json 文件。")
