source_dir = r"D:\study2024\experiment_code_clone\SourceGraphExtractor-main-csg\graph_data\node_edge_total"
target_dir = r"D:\study2024\experiment_code_clone\GraphFeatureExtractor-main\GraphFeatureExtractor-main\data"

print("0 target_dir is " + target_dir)

import os
import json
import re

total_content = []
dir_name = r'\access_control'
target_dir = target_dir + dir_name
if not os.path.exists(target_dir):
    os.makedirs(target_dir)
print("0 target_dir " + target_dir)
for f in os.listdir(source_dir):
    if f.endswith('.json'):
        numbers = re.findall(r'\d+', f)
        number = int(numbers[0])
        if 0<= number <= 41:
            # 299 <= number <= 1058:
            # r'\reentrancy'

            # elif 1061 <= number <= 2918:
            #     dir_name = r'\wild-clean'

            # r'\delegatecal'
            #     42<= number <= 298:

            # r'\access_control'
            # 0<= number <= 41:

            # r'\external_call'
            # 2919<= number <=3336:

            source_file = os.path.join(source_dir, f)
            try:
                with open(source_file) as t:
                    content = json.load(t)
            except json.JSONDecodeError as e:
                print(f"Error loading JSON file '{source_file}': {e}")
                break

            inner_list = content['node_vec'][0][1]
            chunks = [inner_list[i:i + 64] for i in range(0, len(inner_list), 64)]

            # 如果最后一个列表长度不够 64，则在最前面加 0
            if len(chunks[-1]) < 64:
                chunks[-1] = [0] * (64 - len(chunks[-1])) + chunks[-1]

            graph_info = content['graph_edge']
            if graph_info:
                print("grAPH_INFO IS ", graph_info)
            else:
                graph_info = [[49, 20, 50]]
            if os.path.basename(target_dir) == "reentrancy":
                targets = "1"
            elif os.path.basename(target_dir) == "wild-clean":
                targets = "0"
            else:
                targets = "2"

            output_data = {"targets": targets,
                           "graph": graph_info,
                           "contract_name": f[:-5]+".sol",
                           "node_features": chunks}

            total_content.append(output_data)
import random

num_dicts = len(total_content)
print("3 num of reentrancy is " + str(num_dicts))
train_size = int(num_dicts * 0.7)
valid_size = num_dicts - train_size
train_data = random.sample(total_content, train_size)
valid_data = [d for d in total_content if d not in train_data]
target_train_file = os.path.join(target_dir, "train.json")
target_valid_file = os.path.join(target_dir, "valid.json")
wild_clean_path = r"D:\study2024\experiment_code_clone\GraphFeatureExtractor-main\GraphFeatureExtractor-main\data\wild-clean"
wild_clean = []
wild_clean_path_train = os.path.join(wild_clean_path, "train.json")
if os.path.exists(wild_clean_path_train):
    wild_clean_path_valid = os.path.join(wild_clean_path, "valid.json")
    with open(wild_clean_path_train) as f:
        wild_clean_train = json.load(f)
    with open(wild_clean_path_valid) as f:
        wild_clean_valid = json.load(f)

    for item in wild_clean_train:
        wild_clean.append(item)
    for item in wild_clean_valid:
        wild_clean.append(item)
    print("5 len wild clean is " + str(len(wild_clean)))
wild_clean_train = []
wild_clean_valid = []
if len(wild_clean) > 0:
    wild_clean_train = random.sample(wild_clean, train_size)
    wild_clean_valid = random.sample(wild_clean, valid_size)

with open(target_train_file, 'w') as m:
    m.write('[')
    for idx, data in enumerate(train_data):
        json.dump(data, m)
        if idx < len(train_data) - 1:
            m.write(',\n')  # Add a comma and newline after writing each item
    m.write(',\n')
    for idx, data in enumerate(wild_clean_train):
        json.dump(data, m)
        if idx < len(wild_clean_train) - 1:
            m.write(',\n')  # Add a comma and newline after writing each item
    # m.write(',\n')
    m.write(']')

# Writing valid_data
with open(target_valid_file, 'w') as m:
    m.write('[')
    for idx, data in enumerate(valid_data):
        json.dump(data, m)
        if idx < len(valid_data) - 1:
            m.write(',\n')  # Add a comma and newline after writing each item
    m.write(',\n')
    for idx, data in enumerate(wild_clean_valid):
        json.dump(data, m)
        if idx < len(wild_clean_valid) - 1:
            m.write(',\n')  # Add a comma and newline after writing each item
    # m.write(',\n')
    m.write(']')
