import os
import re
import math
import random
import string
import sys
import argparse
import logging

def generate_variable_name(index):
    """
    Function to generate variable names based on the index using a predefined letter sequence.
    This ensures consistency across different code files.
    """
    base = string.ascii_lowercase  # 'abcdefghijklmnopqrstuvwxyz'
    result = []
    
    while index >= 0:
        result.append(base[index % len(base)])
        index = index // len(base) - 1
    
    return ''.join(reversed(result))

def variable_renamer(given_string):
    """
    Function to rename all variables and functions in the given string of C/C++ code.
    """
    variable_dictionary = {}
    special_cases = { "auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", 
                      "float", "for", "goto", "if", "inline", "int", "long", "register", "restrict", "return", "short", "signed", 
                      "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while", 
                      "alignas", "alignof", "and", "and_eq", "asm", "atomic_cancel", "atomic_commit", "atomic_noexcept", "auto", 
                      "bitand", "bitor", "bool", "catch", "char8_t", "char16_t", "char32_t", "class", "compl", "concept", 
                      "const_cast", "constexpr", "consteval", "constinit", "co_await", "co_return", "co_yield", "decltype", 
                      "delete", "dynamic_cast", "explicit", "export", "false", "friend", "goto", "module", "mutable", 
                      "namespace", "new", "noexcept", "not", "not_eq", "nullptr", "operator", "or", "or_eq", "private", 
                      "protected", "public", "reflexpr", "register", "reinterpret_cast", "requires", "sizeof", "static_assert", 
                      "static_cast", "struct", "switch", "synchronized", "template", "this", "throw", "true", "try", "typeid", 
                      "typename", "union", "using", "virtual", "void", "volatile", "wchar_t", "xor", "xor_eq", "define", 
                      "elif", "else", "endif", "error", "if", "ifdef", "ifndef", "include", "line", "pragma", "undef", 
                      "malloc", "calloc", "realloc", "free", "printf", "scanf", "sprintf", "fscanf", "fprintf", "strcpy", 
                      "strcat", "strlen", "strcmp", "memcpy", "memset", "fopen", "fclose", "fread", "fwrite", "getc", "putc", 
                      "exit", "abort", "atoi", "atof", "abs", "floor", "ceil", "pow", "sqrt", "log", "log10", "exp", "sin", 
                      "cos", "tan", "rand", "time", "std::cout", "std::cin", "std::cerr", "std::clog", "std::endl", 
                      "std::string", "std::vector", "std::list", "std::map", "std::set", "std::unordered_map", "std::unordered_set", 
                      "std::sort", "std::swap", "std::find", "std::copy", "std::accumulate", "std::min", "std::max", 
                      "std::array", "std::function", "std::thread", "std::mutex", "std::condition_variable", "std::atomic", 
                      "std::future", "std::promise", "std::optional", "std::shared_ptr", "std::unique_ptr", "std::weak_ptr", 
                      "std::make_shared", "std::make_unique", "std::typeid", "std::exception", "std::string_view", "std::move", 
                      "std::forward" }

    index = 0
    new_string = []
    split_code = re.split('\"', given_string)
  #  filtered_code = re.findall(r"(?:\w+\s+)(?!main)(?:\*)*([a-zA-Z_][a-zA-Z0-9_]*)", given_string)
    filtered_code = re.findall(r"\b([a-zA-Z_][a-zA-Z0-9_]*)\b(?!\s*\()", given_string)
    for found_example in filtered_code:
        if found_example not in special_cases:
            if found_example not in variable_dictionary:
                variable_dictionary[found_example] = generate_variable_name(len(variable_dictionary))
    
    for section in split_code:
        if index % 2 == 0:
            for entry, new_name in variable_dictionary.items():
                re_string = r"\b{}\b".format(entry)  # Word boundary to match whole words only
                section = re.sub(re_string, new_name, section)
                logging.debug(f"Replaced {entry} with {new_name} in section")
        # Add the section to the new string (with quotes if needed)
        if index >= 1:
            new_string.append("\"" + section)
        else:
            new_string.append(section)

        index += 1

    return ''.join(new_string)

def random_string(string_length=8):
    letters = string.ascii_lowercase
    return ''.join(random.choice(letters) for i in range(string_length))

def whitespace_remover(a):
    splits = re.split('\"', a)
    code_string = r"((\w+\s+)[a-zA-Z_*][|a-zA-Z0-9_]*|#.*|return [a-zA-Z0-9_]*| [[.].]|else)"
    index = 0
    a = ""
    for s in splits:
        if index % 2 == 0:
            s_spaceless = re.sub(r"[\s]", "", s)
            s_code = re.findall(code_string, s)
            for code in s_code:
                old = re.sub(r"[\s]", "", code[0])
                new = code[0]
                if code[0][0] == '#':
                    new = code[0] + "\n"
                elif "unsigned" in code[0] or "else" in code[0]:
                    new = code[0] + " "
                s_spaceless = s_spaceless.replace(old, new)
        else:
            s_spaceless = s

        if index >= 1:
            a = a + "\"" + s_spaceless
        else:
            a = a + s_spaceless
        index += 1
    return a

def comment_remover(given_string):
   # cpp_filtered_code = re.findall(r"//.*\n", given_string)
   # for entry in cpp_filtered_code:
   #     given_string = given_string.replace(entry, "")
   # c_filtered_code = re.findall(r"/\*[\s\S]*?\*/", given_string)
   # for entry in c_filtered_code:
   #     given_string = given_string.replace(entry, "")
   # return given_string
    ## 1. 先删除所有的单行注释 (//)
    # 2. 删除多行注释块 (/* ... */)
    given_string = re.sub(r"/\*[\s\S]*?\*/", "", given_string)
    given_string = re.sub(r"//.*\n", "", given_string)
    return given_string

def parse_options(argv):
    parser = argparse.ArgumentParser(add_help=False)
    parser.add_argument("-p", "--paths-file", dest="paths_file", required=True, help="Text file containing paths of source files.", type=str)
    parser.add_argument("-o", "--output-dir", dest="output_dir", required=True, help="Directory to save output files.", type=str)
    parser.add_argument("-rc", "--remove-comments", dest="remove_comments", required=False, action="store_true", help="Remove comments in the source code files.")
    parser.add_argument("-ri", "--rename-identifier", required=False, action="store_true", help="Rename identifiers in the source code files.")
    parser.add_argument("-rw", "--remove-whitespace", dest="remove_whitespace", required=False, action="store_true", help="Remove whitespace in the source code files.")
    args, _ = parser.parse_known_args(argv)
    return args

def main(argv):
    if argv is None:
        argv = sys.argv[1:]

    options = parse_options(argv)
    
    if os.path.exists(options.paths_file):
        with open(options.paths_file, "r") as file:
            file_paths = file.readlines()

        for file_path in file_paths:
            file_path = file_path.strip()
            if os.path.exists(file_path) and (file_path.endswith(".c") or file_path.endswith(".i")):
                logging.info(f"Processing file: {file_path} ...")
                with open(file_path, "r") as src_handler:
                    src_data = src_handler.read()
                    if options.remove_comments:
                        src_data = comment_remover(src_data)
                    if options.remove_whitespace:
                        src_data = whitespace_remover(src_data)
                    if options.rename_identifier:
                        src_data = variable_renamer(src_data)
                
                # Create the output directory if it doesn't exist
                if not os.path.exists(options.output_dir):
                    os.makedirs(options.output_dir)
                
                relative_dir = os.path.dirname(file_path)  # 获取路径中的父目录
                sub_dir = os.path.basename(relative_dir)   # 获取二级目录的名称（如 memsafety-ext3）
    
                # 创建输出目录结构（确保二级目录存在）
                output_subdir = os.path.join(options.output_dir, sub_dir)
                os.makedirs(output_subdir, exist_ok=True)
    
                # 输出文件的完整路径
                output_path = os.path.join(output_subdir, os.path.basename(file_path))
                with open(output_path, "w") as output_file:
                    output_file.write(src_data)
                logging.info(f"File saved to {output_path}")
            else:
                logging.warning(f"Skipping invalid file: {file_path}")
    else:
        logging.error(f"Paths file {options.paths_file} does not exist.")

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    main(sys.argv[1:])


