import networkx as nx
from tree_sitter import Language, Parser
from utils.utils import CONSTANTS
import os

def python_control_dependence_graph(root_node, CCG, src_lines, parent):
    node_id = len(CCG.nodes)

    if root_node.type in ['import_from_statement', 'import_statement','future_import_statement']:
        start_row = root_node.start_point[0]
        end_row = root_node.end_point[0]

        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id
    elif root_node.type == 'type_alias_statement':
        start_row = root_node.start_point[0]
        end_row = root_node.end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id
        alias_name_node = root_node.child_by_field_name('left')
        if alias_name_node and alias_name_node.type == 'identifier':
            alias_name = src_lines[alias_name_node.start_point[0]][
                         alias_name_node.start_point[1]:alias_name_node.end_point[1]].strip()
            CCG.nodes[parent]['defSet'].add(alias_name)

    elif root_node.type in ['class_definition', 'decorated_definition', 'function_definition']:
        if root_node.type == 'function_definition':
            start_row = root_node.start_point[0]
            end_row = root_node.child_by_field_name('parameters').end_point[0]
        elif root_node.type == 'decorated_definition':
            def_node = root_node.child_by_field_name('definition')
            start_row = root_node.start_point[0]
            parameter_node = def_node.child_by_field_name('parameters')
            if parameter_node is not None:
                end_row = parameter_node.end_point[0]
            else:
                end_row = def_node.start_point[0]
        elif root_node.type == 'class_definition':
            start_row = root_node.start_point[0]
            end_row = root_node.child_by_field_name('name').end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id
    elif root_node.type in ['while_statement', 'for_statement']:
        if root_node.type == 'for_statement':
            start_row = root_node.start_point[0]
            end_row = root_node.child_by_field_name('right').end_point[0]
        if root_node.type == 'while_statement':
            start_row = root_node.start_point[0]
            end_row = root_node.child_by_field_name('condition').end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id
    elif root_node.type == 'if_statement':
        start_row = root_node.start_point[0]
        end_row = root_node.child_by_field_name('condition').end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id
    elif root_node.type == 'elif_clause':
        start_row = root_node.start_point[0]
        end_row = root_node.child_by_field_name('condition').end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id
    elif root_node.type in ['else_clause', 'except_clause']:
        start_row = root_node.start_point[0]
        end_row = root_node.start_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id
    elif root_node.type == 'with_statement':
        start_row = root_node.start_point[0]
        end_row = root_node.children[1].end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

    elif root_node.type == 'match_statement':
        start_row = root_node.start_point[0]
        end_row = root_node.child_by_field_name('subject').end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

    elif root_node.type == 'except_group_clause':
        start_row = root_node.start_point[0]
        end_row = root_node.children[1].end_point[0] if len(root_node.children) > 1 else start_row
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

    elif root_node.type == 'finally_clause':
        start_row = root_node.start_point[0]
        end_row = root_node.start_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

    elif root_node.type == 'async_function_definition':
        start_row = root_node.start_point[0]
        end_row = root_node.child_by_field_name('parameters').end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if not (CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row):
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id
        func_name_node = root_node.child_by_field_name('name')
        if func_name_node and func_name_node.type == 'identifier':
            func_name = src_lines[func_name_node.start_point[0]][
                                func_name_node.start_point[1]:func_name_node.end_point[1]].strip()
            CCG.nodes[parent]['defSet'].add(func_name)

    elif root_node.type == 'type_parameter':
        type_param_name = None
        if root_node.child_by_field_name('name'):
            type_param_name = src_lines[root_node.start_point[0]][
                              root_node.start_point[1]:root_node.end_point[1]].strip()

        # 找到所属的类或函数
        current_node = root_node.parent
        while current_node and current_node.type not in ['class_definition', 'function_definition']:
            current_node = current_node.parent

        if current_node and type_param_name:
            parent_node_id = None
            for n in CCG.nodes:
                if (CCG.nodes[n]['startRow'] == current_node.start_point[0] and
                        CCG.nodes[n]['endRow'] == current_node.end_point[0]):
                    parent_node_id = n
                    break
            if parent_node_id:
                CCG.nodes[parent_node_id]['defSet'].add(type_param_name)

    elif root_node.type in ['as_pattern', 'class_pattern']:
        if root_node.type == 'as_pattern':
            alias_node = root_node.child_by_field_name('alias')
            if alias_node and alias_node.type == 'identifier':
                alias_name = src_lines[alias_node.start_point[0]][
                             alias_node.start_point[1]:alias_node.end_point[1]].strip()
                CCG.nodes[parent]['defSet'].add(alias_name)

        start_row = root_node.start_point[0]
        end_row = root_node.end_point[0]
        CCG.add_node(node_id, nodeType=root_node.type,
                     startRow=start_row, endRow=end_row,
                     sourceLines=src_lines[start_row:end_row + 1],
                     defSet=set(),
                     useSet=set())
        CCG.add_edge(parent, node_id, 'CDG')
        parent = node_id

    elif root_node.type in ['list_comprehension', 'dictionary_comprehension', 'set_comprehension']:
        start_row = root_node.start_point[0]
        end_row = root_node.end_point[0]
        CCG.add_node(node_id, nodeType=root_node.type,
                     startRow=start_row, endRow=end_row,
                     sourceLines=src_lines[start_row:end_row + 1],
                     defSet=set(),
                     useSet=set())
        CCG.add_edge(parent, node_id, 'CDG')
        parent = node_id

        # 提取循环变量
        for_clause = root_node.child_by_field_name('for_in_clause')
        if for_clause:
            left_node = for_clause.child_by_field_name('left')
            if left_node:
                if left_node.type == 'identifier':
                    var_name = src_lines[left_node.start_point[0]][
                               left_node.start_point[1]:left_node.end_point[1]].strip()
                    CCG.nodes[node_id]['defSet'].add(var_name)
                elif left_node.type == 'tuple_pattern':
                    for child in left_node.children:
                        if child.type == 'identifier':
                            var_name = src_lines[child.start_point[0]][
                                       child.start_point[1]:child.end_point[1]].strip()
                            CCG.nodes[node_id]['defSet'].add(var_name)

    elif root_node.type == 'decorator':
        # 收集装饰器表达式中的变量
        expr_node = root_node.child_by_field_name('expression')

        def collect_decorator_vars(node, use_set):
            if node.type == 'identifier':
                use_set.add(src_lines[node.start_point[0]][node.start_point[1]:node.end_point[1]].strip())
            for child in node.children:
                collect_decorator_vars(child, use_set)

        if expr_node:
            collect_decorator_vars(expr_node, CCG.nodes[parent]['useSet'])

        # 类型注解处理
    elif root_node.type == 'typed_parameter':
        type_node = root_node.child_by_field_name('type')
        if type_node:
            def collect_type_annotation(node, use_set):
                if node.type == 'identifier':
                    use_set.add(src_lines[node.start_point[0]][node.start_point[1]:node.end_point[1]].strip())
                for child in node.children:
                    collect_type_annotation(child, use_set)

            collect_type_annotation(type_node, CCG.nodes[parent]['useSet'])

    elif 'statement' in root_node.type or 'ERROR' in root_node.type:
        start_row = root_node.start_point[0]
        end_row = root_node.end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

    for child in root_node.children:
        if child.type == 'identifier':
            row = child.start_point[0]
            col_start = child.start_point[1]
            col_end = child.end_point[1]
            identifier_name = src_lines[row][col_start:col_end].strip()
            if parent is None:
                continue
            if 'definition' in CCG.nodes[parent]['nodeType']:
                CCG.nodes[parent]['defSet'].add(identifier_name)
            # 在标识符处理逻辑中添加：
            elif CCG.nodes[parent]['nodeType'] in ['assert_statement', 'raise_statement']:
                CCG.nodes[parent]['useSet'].add(identifier_name)
            elif CCG.nodes[parent]['nodeType'] == 'for_statement':
                p = child
                while p.parent.type != 'for_statement':
                    p = p.parent
                if p.parent.type == 'for_statement' and p.prev_sibling.type == 'for':
                    CCG.nodes[parent]['defSet'].add(identifier_name)
                else:
                    CCG.nodes[parent]['useSet'].add(identifier_name)
            elif CCG.nodes[parent]['nodeType'] == 'with_statement':
                if child.parent.type == 'as_pattern_target':
                # if child.parent.type == 'as_pattern_target'and child.prev_sibling.type == 'as':
                    CCG.nodes[parent]['defSet'].add(identifier_name)
                else:
                    CCG.nodes[parent]['useSet'].add(identifier_name)
            elif CCG.nodes[parent]['nodeType'] == 'expression_statement':
                p = child
                while p.parent.type != 'assignment' and p.parent.type != 'expression_statement':
                    p = p.parent
                if p.parent.type == 'assignment' and p.next_sibling is not None:
                    CCG.nodes[parent]['defSet'].add(identifier_name)
                else:
                    CCG.nodes[parent]['useSet'].add(identifier_name)
            elif 'import' in CCG.nodes[parent]['nodeType']:
                # 处理普通导入、别名导入、通配符导入、多级导入
                if child.type == 'aliased_import':
                    # 别名导入（例如：import numpy as np）
                    alias_node = child.child_by_field_name('alias')
                    if alias_node and alias_node.type == 'identifier':
                        alias_row = alias_node.start_point[0]
                        alias_col_start = alias_node.start_point[1]
                        alias_col_end = alias_node.end_point[1]
                        alias_name = src_lines[alias_row][alias_col_start:alias_col_end].strip()
                        CCG.nodes[parent]['defSet'].add(alias_name)
                elif child.type == 'dotted_name':
                    # 多级导入（例如：import a.b.c）
                    # 提取最后一级模块名（例如：c）
                    dotted_parts = []
                    current_node = child
                    while current_node.type == 'dotted_name':
                        part_node = current_node.children[-1]  # 取最后一个子节点（例如：c）
                        if part_node.type == 'identifier':
                            part_row = part_node.start_point[0]
                            part_col_start = part_node.start_point[1]
                            part_col_end = part_node.end_point[1]
                            part_name = src_lines[part_row][part_col_start:part_col_end].strip()
                            dotted_parts.append(part_name)
                        current_node = current_node.children[0] if len(current_node.children) > 0 else None
                    if dotted_parts:
                        CCG.nodes[parent]['defSet'].add(dotted_parts[-1])
                elif child.type == 'wildcard_import':
                    # 通配符导入（例如：from module import *）
                    # 提取模块名（例如：module）
                    module_name_node = root_node.child_by_field_name('module_name')
                    if module_name_node and module_name_node.type == 'dotted_name':
                        module_parts = []
                        current_node = module_name_node
                        while current_node.type == 'dotted_name':
                            part_node = current_node.children[-1]
                            if part_node.type == 'identifier':
                                part_row = part_node.start_point[0]
                                part_col_start = part_node.start_point[1]
                                part_col_end = part_node.end_point[1]
                                part_name = src_lines[part_row][part_col_start:part_col_end].strip()
                                module_parts.append(part_name)
                            current_node = current_node.children[0] if len(current_node.children) > 0 else None
                        if module_parts:
                            CCG.nodes[parent]['defSet'].add(f"{'.'.join(module_parts)}.*")
                elif child.type == 'identifier' and root_node.type == 'future_import_statement':
                    # 特殊处理 __future__ 导入（不定义任何变量）
                    CCG.nodes[parent]['defSet'] = set()
                else:
                    CCG.nodes[parent]['defSet'].add(identifier_name)
            elif CCG.nodes[parent]['nodeType'] in ['lambda', 'function_definition']:
                if child.type == 'identifier':
                    # 检查是否是自由变量（在父作用域定义）
                    current_parent = parent
                    found = False
                    while current_parent is not None:
                        if child.text.decode() in CCG.nodes[current_parent]['defSet']:
                            found = True
                            break
                        current_parent = list(CCG.predecessors(current_parent))[0] if CCG.predecessors(
                            current_parent) else None
                    if not found:
                        CCG.nodes[parent]['useSet'].add(child.text.decode())
            elif CCG.nodes[parent]['nodeType'] in ['global_statement', 'nonlocal_statement']:
                CCG.nodes[parent]['defSet'].add(identifier_name)
            else:
                CCG.nodes[parent]['useSet'].add(identifier_name)
        python_control_dependence_graph(child, CCG, src_lines, parent)

    return


def python_control_flow_graph(CCG):
    #初始化控制流图
    CFG = nx.MultiDiGraph()
    #初始化后继节点和第一个子节点
    next_sibling = dict()
    first_children = dict()

    start_nodes = []
    for v in CCG.nodes:
        if len(list(CCG.predecessors(v))) == 0:
            start_nodes.append(v)
    start_nodes.sort()

    for i in range(0, len(start_nodes) - 1):
        v = start_nodes[i]
        u = start_nodes[i + 1]
        next_sibling[v] = u
    next_sibling[start_nodes[-1]] = None

    #遍历节点并设置兄弟节点和第一个子节点
    for v in CCG.nodes:
        children = list(CCG.neighbors(v))
        if len(children) != 0:
            children.sort()
            for i in range(0, len(children) - 1):
                u = children[i]
                w = children[i + 1]
                if CCG.nodes[v]['nodeType'] == 'if_statement' and 'clause' in CCG.nodes[w]['nodeType']:
                    next_sibling[u] = None
                else:
                    next_sibling[u] = w
            next_sibling[children[-1]] = None
            first_children[v] = children[0]
        else:
            first_children[v] = None

    #初始化边列表
    edge_list = []

    #构建控制流边
    for v in CCG.nodes:
        # block start control flow
        if v in first_children.keys():
            u = first_children[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
        # block end control flow
        if CCG.nodes[v]['nodeType'] in ['return_statement', 'raise_statement']:
            pass
        elif CCG.nodes[v]['nodeType'] in ['break_statement', 'continue_statement']:
            u = None
            predecessors = list(CCG.predecessors(v))
            if not predecessors:
                continue
            p = list(CCG.predecessors(v))[0]
            while CCG.nodes[p]['nodeType'] not in ['for_statement', 'while_statement']:
                p = list(CCG.predecessors(p))[0]
            if CCG.nodes[v]['nodeType'] == 'break_statement':
                u = next_sibling[p]
            if CCG.nodes[v]['nodeType'] == 'continue_statement':
                u = p
            if u is not None:
                edge_list.append((v, u, 'CFG'))
        elif CCG.nodes[v]['nodeType'] == 'for_statement':
            u = first_children[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
        elif CCG.nodes[v]['nodeType'] == 'while_statement':
            u = first_children[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
            u = next_sibling[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
        elif CCG.nodes[v]['nodeType'] in ['if_statement' or 'try_statement']:
            u = first_children[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
            for u in CCG.neighbors(v):
                if 'clause' in CCG.nodes[u]['nodeType']:
                    edge_list.append((v, u, 'CFG'))

        elif CCG.nodes[v]['nodeType'] == 'match_statement':
            u = first_children[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
            # 匹配失败时的默认流（指向下一个兄弟节点）
            u = next_sibling[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
            # edge_list.append((v, next_sibling[v], 'CFG'))

        # 新加的部分——————————————————
        elif CCG.nodes[v]['nodeType'] == 'try_statement':
            # u = first_children[v]
            # if u is not None:
            #     edge_list.append((v, u, 'CFG'))
            # # 连接到 finally 或后续节点
            # for clause in CCG.neighbors(v):
            #     if CCG.nodes[clause]['nodeType'] in ['except_clause', 'except_group_clause']:
            #         edge_list.append((v, clause, 'CFG'))
            #     elif CCG.nodes[clause]['nodeType'] == 'finally_clause':
            #         edge_list.append((clause, next_sibling[v], 'CFG'))
            # 连接try块到所有except和finally子句
            except_nodes = []
            finally_node = None
            for u in CCG.neighbors(v):
                if CCG.nodes[u]['nodeType'] in ['except_clause', 'except_group_clause']:
                    except_nodes.append(u)
                elif CCG.nodes[u]['nodeType'] == 'finally_clause':
                    finally_node = u

            # 添加try到第一个except的边
            if except_nodes:
                edge_list.append((v, except_nodes[0], 'CFG'))

            # 添加except之间的链式连接
            for i in range(len(except_nodes) - 1):
                edge_list.append((except_nodes[i], except_nodes[i + 1], 'CFG'))

            # 处理finally
            if finally_node:
                # 连接最后一个except到finally
                if except_nodes:
                    edge_list.append((except_nodes[-1], finally_node, 'CFG'))
                else:
                    edge_list.append((v, finally_node, 'CFG'))

                # 连接finally到后续节点
                if next_sibling[v]:
                    edge_list.append((finally_node, next_sibling[v], 'CFG'))

        elif CCG.nodes[v]['nodeType'] == 'with_statement':
            # 添加隐式__exit__调用
            manager_nodes = []
            for u in CCG.neighbors(v):
                if CCG.nodes[u]['nodeType'] == 'with_clause':
                    for item in CCG.neighbors(u):
                        if CCG.nodes[item]['nodeType'] == 'with_item':
                            manager_nodes.append(item)

            # 为每个资源管理器添加退出边
            for manager in manager_nodes:
                edge_list.append((manager, next_sibling[v], 'CFG'))

        elif 'clause' in CCG.nodes[v]['nodeType']:
            u = first_children[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))

        #处理无兄弟节点的情况
        u = next_sibling[v]
        if u is None:
            p = v
            while len(list(CCG.predecessors(p))) != 0:
                p = list(CCG.predecessors(p))[0]
                if CCG.nodes[p]['nodeType'] == 'while_statement':
                        edge_list.append((v, p, 'CFG'))
                        break
                if CCG.nodes[p]['nodeType'] == 'for_statement':
                    edge_list.append((v, p, 'CFG'))
                    break
                if CCG.nodes[p]['nodeType'] in ['try_statement', 'if_statement']:
                    if next_sibling[p] is not None:
                        edge_list.append((v, next_sibling[p], 'CFG'))
                        break
        if u is not None:
            edge_list.append((v, u, 'CFG'))
    CFG.add_edges_from(edge_list)
    for v in CCG.nodes:
        if v not in CFG.nodes:
            CFG.add_node(v)
    return CFG, edge_list


def python_data_dependence_graph(CFG, CCG):
    #遍历所有的节点对
    for v in CCG.nodes:
        for u in CCG.nodes:
            if v == u or 'import' in CCG.nodes[v]['nodeType']:
                continue
            # find the definition of u
            u_def = u
            u_def_set = set()
            while len(list(CCG.predecessors(u_def))) != 0:
                u_def = list(CCG.predecessors(u_def))[0]
                if 'definition' in CCG.nodes[u_def]['nodeType']:
                    u_def_set.add(u_def)
            #如果节点 v 是定义节点但不在 u_def_set 中，则跳过当前循环。
            if 'definition' in CCG.nodes[v]['nodeType'] and v not in u_def_set:
                continue
                # 新加入的部分
            if CCG.nodes[v]['nodeType'] in ['lambda', 'function_definition']:
                # 收集自由变量
                free_vars = CCG.nodes[v]['useSet'] - CCG.nodes[v]['defSet']
                for var in free_vars:
                        # 查找定义节点
                        for u in CCG.nodes:
                            if var in CCG.nodes[u]['defSet'] and nx.has_path(CFG, u, v):
                                CCG.add_edge(u, v, 'DDG')
            if CCG.nodes[v]['nodeType'] == 'typed_parameter':
                    # 类型注解中的类型依赖
                for type_name in CCG.nodes[v]['useSet']:
                    for u in CCG.nodes:
                            if type_name in CCG.nodes[u]['defSet'] and nx.has_path(CFG, u, v):
                                CCG.add_edge(u, v, 'DDG')
                # 新加入的部分
            #检查节点 v 的定义集合 defSet 和节点 u 的使用集合 useSet 是否有交集，并且在控制流图中是否存在从 v 到 u 的路径。
            if len(CCG.nodes[v]['defSet'] & CCG.nodes[u]['useSet']) != 0 and nx.has_path(CFG, v, u):
                has_path = False
                paths = list(nx.all_shortest_paths(CFG, source=v, target=u))
                variables = CCG.nodes[v]['defSet'] & CCG.nodes[u]['useSet']
                for var in variables:
                    has_def = False
                    for path in paths:
                        for p in path[1:-1]:
                            if var in CCG.nodes[p]['defSet']:
                                has_def = True
                                break
                        if not has_def:
                            has_path = True
                            break
                    if has_path:
                        break
                #添加数据依赖边
                if has_path:
                    CCG.add_edge(v, u, 'DDG')

    return

# Original version
# def python_control_dependence_graph(root_node, CCG, src_lines, parent):
#     node_id = len(CCG.nodes)
#
#     if root_node.type in ['import_from_statement', 'import_statement']:
#         start_row = root_node.start_point[0]
#         end_row = root_node.end_point[0]
#
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(),
#                          useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(),
#                              useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#     elif root_node.type in ['class_definition', 'decorated_definition', 'function_definition']:
#         if root_node.type == 'function_definition':
#             start_row = root_node.start_point[0]
#             end_row = root_node.child_by_field_name('parameters').end_point[0]
#         elif root_node.type == 'decorated_definition':
#             def_node = root_node.child_by_field_name('definition')
#             start_row = root_node.start_point[0]
#             parameter_node = def_node.child_by_field_name('parameters')
#             if parameter_node is not None:
#                 end_row = parameter_node.end_point[0]
#             else:
#                 end_row = def_node.start_point[0]
#         elif root_node.type == 'class_definition':
#             start_row = root_node.start_point[0]
#             end_row = root_node.child_by_field_name('name').end_point[0]
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(),
#                          useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(),
#                              useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#     elif root_node.type in ['while_statement', 'for_statement']:
#         if root_node.type == 'for_statement':
#             start_row = root_node.start_point[0]
#             end_row = root_node.child_by_field_name('right').end_point[0]
#         if root_node.type == 'while_statement':
#             start_row = root_node.start_point[0]
#             end_row = root_node.child_by_field_name('condition').end_point[0]
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(),
#                          useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(),
#                              useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#     elif root_node.type == 'if_statement':
#         start_row = root_node.start_point[0]
#         end_row = root_node.child_by_field_name('condition').end_point[0]
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(),
#                          useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(),
#                              useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#     elif root_node.type == 'elif_clause':
#         start_row = root_node.start_point[0]
#         end_row = root_node.child_by_field_name('condition').end_point[0]
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(),
#                          useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(),
#                              useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#     elif root_node.type in ['else_clause', 'except_clause']:
#         start_row = root_node.start_point[0]
#         end_row = root_node.start_point[0]
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(),
#                          useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(),
#                              useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#     elif root_node.type == 'with_statement':
#         start_row = root_node.start_point[0]
#         end_row = root_node.children[1].end_point[0]
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(),
#                          useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(),
#                              useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#     elif 'statement' in root_node.type or 'ERROR' in root_node.type:
#         start_row = root_node.start_point[0]
#         end_row = root_node.end_point[0]
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(),
#                          useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(),
#                              useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#
#     for child in root_node.children:
#         if child.type == 'identifier':
#             row = child.start_point[0]
#             col_start = child.start_point[1]
#             col_end = child.end_point[1]
#             identifier_name = src_lines[row][col_start:col_end].strip()
#             if parent is None:
#                 continue
#             if 'definition' in CCG.nodes[parent]['nodeType']:
#                 CCG.nodes[parent]['defSet'].add(identifier_name)
#             elif CCG.nodes[parent]['nodeType'] == 'for_statement':
#                 p = child
#                 while p.parent.type != 'for_statement':
#                     p = p.parent
#                 if p.parent.type == 'for_statement' and p.prev_sibling.type == 'for':
#                     CCG.nodes[parent]['defSet'].add(identifier_name)
#                 else:
#                     CCG.nodes[parent]['useSet'].add(identifier_name)
#             elif CCG.nodes[parent]['nodeType'] == 'with_statement':
#                 if child.parent.type == 'as_pattern_target':
#                     CCG.nodes[parent]['defSet'].add(identifier_name)
#                 else:
#                     CCG.nodes[parent]['useSet'].add(identifier_name)
#             elif CCG.nodes[parent]['nodeType'] == 'expression_statement':
#                 p = child
#                 while p.parent.type != 'assignment' and p.parent.type != 'expression_statement':
#                     p = p.parent
#                 if p.parent.type == 'assignment' and p.next_sibling is not None:
#                     CCG.nodes[parent]['defSet'].add(identifier_name)
#                 else:
#                     CCG.nodes[parent]['useSet'].add(identifier_name)
#             elif 'import' in CCG.nodes[parent]['nodeType']:
#                 CCG.nodes[parent]['defSet'].add(identifier_name)
#             elif CCG.nodes[parent]['nodeType'] in ['global_statement', 'nonlocal_statement']:
#                 CCG.nodes[parent]['defSet'].add(identifier_name)
#             else:
#                 CCG.nodes[parent]['useSet'].add(identifier_name)
#         python_control_dependence_graph(child, CCG, src_lines, parent)
#
#     return
#
#
# def python_control_flow_graph(CCG):
#     CFG = nx.MultiDiGraph()
#
#     next_sibling = dict()
#     first_children = dict()
#
#     start_nodes = []
#     for v in CCG.nodes:
#         if len(list(CCG.predecessors(v))) == 0:
#             start_nodes.append(v)
#     start_nodes.sort()
#     for i in range(0, len(start_nodes) - 1):
#         v = start_nodes[i]
#         u = start_nodes[i + 1]
#         next_sibling[v] = u
#     next_sibling[start_nodes[-1]] = None
#
#     for v in CCG.nodes:
#         children = list(CCG.neighbors(v))
#         if len(children) != 0:
#             children.sort()
#             for i in range(0, len(children) - 1):
#                 u = children[i]
#                 w = children[i + 1]
#                 if CCG.nodes[v]['nodeType'] == 'if_statement' and 'clause' in CCG.nodes[w]['nodeType']:
#                     next_sibling[u] = None
#                 else:
#                     next_sibling[u] = w
#             next_sibling[children[-1]] = None
#             first_children[v] = children[0]
#         else:
#             first_children[v] = None
#
#     edge_list = []
#
#     for v in CCG.nodes:
#         # block start control flow
#         if v in first_children.keys():
#             u = first_children[v]
#             if u is not None:
#                 edge_list.append((v, u, 'CFG'))
#         # block end control flow
#         if CCG.nodes[v]['nodeType'] in ['return_statement', 'raise_statement']:
#             pass
#         elif CCG.nodes[v]['nodeType'] in ['break_statement', 'continue_statement']:
#             u = None
#             p = list(CCG.predecessors(v))[0]
#             while CCG.nodes[p]['nodeType'] not in ['for_statement', 'while_statement']:
#                 p = list(CCG.predecessors(p))[0]
#             if CCG.nodes[v]['nodeType'] == 'break_statement':
#                 u = next_sibling[p]
#             if CCG.nodes[v]['nodeType'] == 'continue_statement':
#                 u = p
#             if u is not None:
#                 edge_list.append((v, u, 'CFG'))
#         elif CCG.nodes[v]['nodeType'] == 'for_statement':
#             u = first_children[v]
#             if u is not None:
#                 edge_list.append((v, u, 'CFG'))
#         elif CCG.nodes[v]['nodeType'] == 'while_statement':
#             u = first_children[v]
#             if u is not None:
#                 edge_list.append((v, u, 'CFG'))
#             u = next_sibling[v]
#             if u is not None:
#                 edge_list.append((v, u, 'CFG'))
#         elif CCG.nodes[v]['nodeType'] in ['if_statement' or 'try_statement']:
#             u = first_children[v]
#             if u is not None:
#                 edge_list.append((v, u, 'CFG'))
#             for u in CCG.neighbors(v):
#                 if 'clause' in CCG.nodes[u]['nodeType']:
#                     edge_list.append((v, u, 'CFG'))
#         elif 'clause' in CCG.nodes[v]['nodeType']:
#             u = first_children[v]
#             if u is not None:
#                 edge_list.append((v, u, 'CFG'))
#
#         u = next_sibling[v]
#         if u is None:
#             p = v
#             while len(list(CCG.predecessors(p))) != 0:
#                 p = list(CCG.predecessors(p))[0]
#                 if CCG.nodes[p]['nodeType'] == 'while_statement':
#                         edge_list.append((v, p, 'CFG'))
#                         break
#                 if CCG.nodes[p]['nodeType'] == 'for_statement':
#                     edge_list.append((v, p, 'CFG'))
#                     break
#                 if CCG.nodes[p]['nodeType'] in ['try_statement', 'if_statement']:
#                     if next_sibling[p] is not None:
#                         edge_list.append((v, next_sibling[p], 'CFG'))
#                         break
#         if u is not None:
#             edge_list.append((v, u, 'CFG'))
#     CFG.add_edges_from(edge_list)
#     for v in CCG.nodes:
#         if v not in CFG.nodes:
#             CFG.add_node(v)
#     return CFG, edge_list
#
#
# def python_data_dependence_graph(CFG, CCG):
#     for v in CCG.nodes:
#         for u in CCG.nodes:
#             if v == u or 'import' in CCG.nodes[v]['nodeType']:
#                 continue
#             # find the definition of u
#             u_def = u
#             u_def_set = set()
#             while len(list(CCG.predecessors(u_def))) != 0:
#                 u_def = list(CCG.predecessors(u_def))[0]
#                 if 'definition' in CCG.nodes[u_def]['nodeType']:
#                     u_def_set.add(u_def)
#             if 'definition' in CCG.nodes[v]['nodeType'] and v not in u_def_set:
#                 continue
#             if len(CCG.nodes[v]['defSet'] & CCG.nodes[u]['useSet']) != 0 and nx.has_path(CFG, v, u):
#                 has_path = False
#                 paths = list(nx.all_shortest_paths(CFG, source=v, target=u))
#                 variables = CCG.nodes[v]['defSet'] & CCG.nodes[u]['useSet']
#                 for var in variables:
#                     has_def = False
#                     for path in paths:
#                         for p in path[1:-1]:
#                             if var in CCG.nodes[p]['defSet']:
#                                 has_def = True
#                                 break
#                         if not has_def:
#                             has_path = True
#                             break
#                     if has_path:
#                         break
#                 if has_path:
#                     CCG.add_edge(v, u, 'DDG')
#     return


def java_control_dependence_graph(root_node, CCG, src_lines, parent):
    node_id = len(CCG.nodes)
    #print(root_node.type)
    if root_node.type in[ 'import_declaration','package_declaration']:
        start_row = root_node.start_point[0]
        end_row = root_node.end_point[0]

        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id
    elif root_node.type in ['constructor_declaration','class_declaration', 'method_declaration', 'enum_declaration', 'interface_declaration','module_declaration']:
        if root_node.type in[ 'method_declaration','constructor_declaration']:
            start_row = root_node.start_point[0]
            end_row = root_node.child_by_field_name('parameters').end_point[0]
        elif root_node.type in ['class_declaration', 'enum_declaration', 'interface_declaration','module_declaration']:
            start_row = root_node.start_point[0]
            end_row = root_node.child_by_field_name('body').start_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

    elif root_node.type == 'record_declaration':
        start_row = root_node.start_point[0]
        end_row = root_node.end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(), useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(), useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

    elif root_node.type =='switch_expression':
        start_row = root_node.start_point[0]
        if root_node.child_by_field_name('body'):
            end_row = root_node.child_by_field_name('body').end_point[0]
        elif root_node.child_by_field_name('condition'):
            end_row = root_node.child_by_field_name('condition').end_point[0]
        else:
            end_row = root_node.end_point[0]

        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(), useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(), useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id
    elif root_node.type == 'try_statement':
        start_row = root_node.start_point[0]
        end_row = root_node.end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(), useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(), useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

    elif root_node.type == 'try_with_resources_statement':
        start_row = root_node.start_point[0]
        resources_child = root_node.child_by_field_name('resources')
        end_row = resources_child.end_point[0] if resources_child else root_node.end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(), useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(), useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

    elif root_node.type in ['while_statement', 'for_statement','do_statement']:
        if root_node.type == 'for_statement':
            start_row = root_node.start_point[0]
            if root_node.child_by_field_name('condition'):
                end_row = root_node.child_by_field_name('condition').end_point[0]
            else:
                end_row = root_node.start_point[0]


        elif root_node.type in ['while_statement','do_statement']:
            start_row = root_node.start_point[0]
            if root_node.child_by_field_name('condition'):
                condition_child = root_node.child_by_field_name('condition')
                end_row = condition_child.end_point[0]
            else:
                end_row = root_node.end_point[0]

        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id
    elif root_node.type == 'if_statement':
        start_row = root_node.start_point[0]
        end_row = root_node.child_by_field_name('condition').end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

    elif root_node.type == 'switch_label':
        start_row = root_node.start_point[0]
        pattern_child = root_node.child_by_field_name('pattern')
        expr_child = root_node.child_by_field_name('expression')
        if pattern_child:
            end_row = pattern_child.end_point[0]
        elif expr_child:
            end_row = expr_child.end_point[0]
        else:
            end_row = root_node.end_point[0]

        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

    #除了else语句 except 这里好像没有
    elif root_node.type in ['else', 'except_clause',  'finally_clause','resource_specification']:
        start_row = root_node.start_point[0]
        end_row = root_node.start_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id
    elif root_node.type == 'catch_clause':
        start_row = root_node.start_point[0]
        end_row = root_node.child_by_field_name('body').start_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id
    elif root_node.type == 'object_creation_expression':
        start_row = root_node.start_point[0]
        if root_node.child_by_field_name('arguments'):
            end_row = root_node.child_by_field_name('arguments').end_point[0]
        elif root_node.child_by_field_name('type'):
            end_row = root_node.child_by_field_name('arguments').end_point[0]
        else:
            end_row = root_node.start_point[0]

        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

    elif root_node.type == 'local_variable_declaration':
        start_row = root_node.start_point[0]
        end_row = root_node.child_by_field_name('declarator').end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

    elif root_node.type == 'lambda_expression':
        start_row = root_node.start_point[0]
        parameters = root_node.child_by_field_name('parameters')
        end_row = parameters.end_point[0] if parameters else root_node.end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

        # if parameters:
        #     for param in parameters.children:
        #         if param.type == 'identifier':
        #             CCG.nodes[node_id]['defSet'].add(param.text.decode())

    elif root_node.type == 'synchronized_statement':
        start_row = root_node.start_point[0]
        if root_node.child_by_field_name('block'):
            end_row = root_node.child_by_field_name('block').start_point[0]
        else:
            end_row = root_node.end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id

    elif 'statement' in root_node.type or 'ERROR' in root_node.type:
        start_row = root_node.start_point[0]
        end_row = root_node.end_point[0]
        if parent is None:
            CCG.add_node(node_id, nodeType=root_node.type,
                         startRow=start_row, endRow=end_row,
                         sourceLines=src_lines[start_row:end_row + 1],
                         defSet=set(),
                         useSet=set())
            parent = node_id
        else:
            if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
                pass
            else:
                CCG.add_node(node_id, nodeType=root_node.type,
                             startRow=start_row, endRow=end_row,
                             sourceLines=src_lines[start_row:end_row + 1],
                             defSet=set(),
                             useSet=set())
                CCG.add_edge(parent, node_id, 'CDG')
                parent = node_id


    for child in root_node.children:
        if child.type == 'identifier':
            row = child.start_point[0]
            col_start = child.start_point[1]
            col_end = child.end_point[1]
            identifier_name = src_lines[row][col_start:col_end].strip()
            if parent is None:
                continue
            if 'definition' in CCG.nodes[parent]['nodeType']:
                CCG.nodes[parent]['defSet'].add(identifier_name)
            elif CCG.nodes[parent]['nodeType'] == 'for_statement':
                p = child
                while p.parent.type != 'for_statement':
                    p = p.parent
                if p.parent.type == 'for_statement' and p.prev_sibling.type == 'for':
                    CCG.nodes[parent]['defSet'].add(identifier_name)
                else:
                    CCG.nodes[parent]['useSet'].add(identifier_name)
            elif CCG.nodes[parent]['nodeType'] in ['assignment_expression', 'local_variable_declaration']:
                if child.next_sibling is not None:
                    CCG.nodes[parent]['defSet'].add(identifier_name)
                else:
                    CCG.nodes[parent]['useSet'].add(identifier_name)

            # elif CCG.nodes[parent]['nodeType'] == 'lambda_expression':
            #     CCG.nodes[node_id]['defSet'].update(
            #     [param.text for param in parameters.children if param.type == 'identifier'])

            elif 'import' in CCG.nodes[parent]['nodeType']:
                CCG.nodes[parent]['defSet'].add(identifier_name)
            elif CCG.nodes[parent]['nodeType'] == 'method_declaration':
                # Handle method parameters and local variables
                if child.parent.type == 'formal_parameter':
                    CCG.nodes[parent]['defSet'].add(identifier_name)
                else:
                    CCG.nodes[parent]['useSet'].add(identifier_name)
            elif CCG.nodes[parent]['nodeType'] == 'class_declaration':
                # Handle class fields
                if child.parent.type == 'field_declaration':
                    CCG.nodes[parent]['defSet'].add(identifier_name)
                else:
                    CCG.nodes[parent]['useSet'].add(identifier_name)
            elif CCG.nodes[parent]['nodeType'] == 'catch_clause':
                # Handle exception variables
                if child.parent.type == 'catch_formal_parameter':
                    CCG.nodes[parent]['defSet'].add(identifier_name)
                else:
                    CCG.nodes[parent]['useSet'].add(identifier_name)
            elif CCG.nodes[parent]['nodeType'] == 'enhanced_for_statement':
                # Handle enhanced for loop variables
                if child.parent.type == 'enhanced_for_statement' and child.prev_sibling is not None and child.prev_sibling.type == 'variable_declarator_id':
                    CCG.nodes[parent]['defSet'].add(identifier_name)
                else:
                    CCG.nodes[parent]['useSet'].add(identifier_name)
            elif CCG.nodes[parent]['nodeType'] == 'try_statement':
                # Handle try-with-resources variables
                if child.parent.type == 'resource_specification':
                    CCG.nodes[parent]['defSet'].add(identifier_name)
                else:
                    CCG.nodes[parent]['useSet'].add(identifier_name)
            elif CCG.nodes[parent]['nodeType'] == 'enum_declaration':
                # Handle enum constants
                if child.parent.type == 'enum_constant':
                    CCG.nodes[parent]['defSet'].add(identifier_name)
                else:
                    CCG.nodes[parent]['useSet'].add(identifier_name)

            else:
                CCG.nodes[parent]['useSet'].add(identifier_name)

        java_control_dependence_graph(child, CCG, src_lines, parent)

    return


def java_control_flow_graph(CCG):
    CFG = nx.MultiDiGraph()

    next_sibling = dict()
    first_children = dict()
    start_nodes = []

    for v in CCG.nodes:

        if len(list(CCG.predecessors(v))) == 0:
            start_nodes.append(v)

    start_nodes.sort()
    for i in range(0, len(start_nodes) - 1):
        v = start_nodes[i]
        u = start_nodes[i + 1]
        next_sibling[v] = u
    #start_node没有-1？？？
    next_sibling[start_nodes[-1]] = None

    for v in CCG.nodes:
        children = list(CCG.neighbors(v))
        if len(children) != 0:
            children.sort()
            for i in range(0, len(children) - 1):
                u = children[i]
                w = children[i + 1]
                if CCG.nodes[v]['nodeType'] == 'if_statement' and 'clause' in CCG.nodes[w]['nodeType']:
                    next_sibling[u] = None
                else:
                    next_sibling[u] = w
            next_sibling[children[-1]] = None
            first_children[v] = children[0]
        else:
            first_children[v] = None

    edge_list = []

    for v in CCG.nodes:
        # block start control flow
        if v in first_children.keys():
            u = first_children[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))

        if CCG.nodes[v]['nodeType'] == 'return_statement':
            pass
        elif CCG.nodes[v]['nodeType'] in ['break_statement', 'continue_statement']:
            u = None
            predecessors = list(CCG.predecessors(v))
            if not predecessors:
                continue
            p = list(CCG.predecessors(v))[0]
            while CCG.nodes[p]['nodeType'] not in ['for_statement', 'while_statement','do_statement']:
                #print('qqq'+CCG.nodes[p]['nodeType'])
                if list(CCG.predecessors(p))==[]:
                    break
                p = list(CCG.predecessors(p))[0]
            if CCG.nodes[v]['nodeType'] == 'break_statement'and p is not None:
                u = next_sibling[p]
            if CCG.nodes[v]['nodeType'] == 'continue_statement'and p is not None:
                u = p
            if u is not None:
                edge_list.append((v, u, 'CFG'))
        elif CCG.nodes[v]['nodeType'] == 'for_statement':
            u = first_children[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
            u = next_sibling[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
        elif CCG.nodes[v]['nodeType'] == 'while_statement':
            u = first_children[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
            u = next_sibling[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
        elif CCG.nodes[v]['nodeType'] == 'do_statement':
            u = first_children[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
            u = next_sibling[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
        elif CCG.nodes[v]['nodeType'] =='if_statement':
            u = first_children[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
            for u in CCG.neighbors(v):
                if 'clause' in CCG.nodes[u]['nodeType']:
                    edge_list.append((v, u, 'CFG'))
        elif CCG.nodes[v]['nodeType'] == 'try_statement':
            u = first_children.get(v, None)
            if u is not None:
                edge_list.append((v, u, 'CFG'))
            for u in CCG.neighbors(v):
                if 'catch' in CCG.nodes[u]['nodeType'] or 'finally' in CCG.nodes[u]['nodeType']:
                    edge_list.append((v, u, 'CFG'))
        elif CCG.nodes[v]['nodeType'] == 'switch_expression':
            u = first_children.get(v, None)
            if u is not None:
                edge_list.append((v, u, 'CFG'))
            for u in CCG.neighbors(v):
                if 'switch_label' in CCG.nodes[u]['nodeType']:
                    edge_list.append((v, u, 'CFG'))
        elif CCG.nodes[v]['nodeType'] == 'switch_label':
            u = first_children.get(v, None)
            if u is not None:
                edge_list.append((v, u, 'CFG'))
        elif 'clause' in CCG.nodes[v]['nodeType']:
            u = first_children[v]
            if u is not None:
                edge_list.append((v, u, 'CFG'))
        elif CCG.nodes[v]['nodeType'] == 'synchronized_statement':
            u = first_children.get(v, None)
            if u is not None:
                edge_list.append((v, u, 'CFG'))
        elif CCG.nodes[v]['nodeType'] == 'assert_statement':
            u = next_sibling.get(v, None)
            if u is not None:
                edge_list.append((v, u, 'CFG'))

        #处理无兄弟节点的情况
        u = next_sibling[v]
        if u is None:
            p = v
            while len(list(CCG.predecessors(p))) != 0:
                p = list(CCG.predecessors(p))[0]
                if CCG.nodes[p]['nodeType'] in[ 'while_statement','do_statement']:
                        edge_list.append((v, p, 'CFG'))
                        break
                if CCG.nodes[p]['nodeType'] == 'for_statement':
                    edge_list.append((v, p, 'CFG'))
                    break
                if CCG.nodes[p]['nodeType'] in ['try_statement', 'if_statement']:
                    if next_sibling[p] is not None:
                        edge_list.append((v, next_sibling[p], 'CFG'))
                        break
        if u is not None:
            edge_list.append((v, u, 'CFG'))
    CFG.add_edges_from(edge_list)
    for v in CCG.nodes:
        if v not in CFG.nodes:
            CFG.add_node(v)
    return CFG, edge_list


def java_data_dependence_graph(CFG, CCG):
    for v in CCG.nodes:
        for u in CCG.nodes:
            if v == u or 'import' in CCG.nodes[v]['nodeType']:
                continue
            # find the definition of u
            u_def = u
            u_def_set = set()
            while len(list(CCG.predecessors(u_def))) != 0:
                u_def = list(CCG.predecessors(u_def))[0]
                if 'declaration' in CCG.nodes[u_def]['nodeType']:
                    u_def_set.add(u_def)
            if 'declaration' in CCG.nodes[v]['nodeType'] and v not in u_def_set:
                continue
            # 检查节点 v 的定义集合 defSet 和节点 u 的使用集合 useSet 是否有交集，并且在控制流图中是否存在从 v 到 u 的路径。
            if len(CCG.nodes[v]['defSet'] & CCG.nodes[u]['useSet']) != 0 and nx.has_path(CFG, v, u):
                has_path = False
                paths = list(nx.all_shortest_paths(CFG, source=v, target=u))
                variables = CCG.nodes[v]['defSet'] & CCG.nodes[u]['useSet']
                for var in variables:
                    has_def = False
                    for path in paths:
                        for p in path[1:-1]:
                            if var in CCG.nodes[p]['defSet']:
                                has_def = True
                                break
                        if not has_def:
                            has_path = True
                            break
                    if has_path:
                        break
                if has_path:
                    CCG.add_edge(v, u, 'DDG')
    return

# Original version
# def java_control_dependence_graph(root_node, CCG, src_lines, parent):
#     node_id = len(CCG.nodes)
#
#     if root_node.type in ['import_declaration','package_declaration']:
#         start_row = root_node.start_point[0]
#         end_row = root_node.end_point[0]
#
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(),
#                          useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(),
#                              useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#     elif root_node.type == 'record_declaration':
#         start_row = root_node.start_point[0]
#         #考虑一下是用body还是parameters 来计算end_row
#         end_row = root_node.end_point[0]
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(), useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(), useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#
#     elif root_node.type in ['class_declaration', 'method_declaration', 'enum_declaration', 'interface_declaration']:
#         if root_node.type == 'method_declaration':
#             start_row = root_node.start_point[0]
#             end_row = root_node.child_by_field_name('parameters').end_point[0]
#         elif root_node.type in ['class_declaration', 'enum_declaration', 'interface_declaration']:
#             start_row = root_node.start_point[0]
#             end_row = root_node.child_by_field_name('name').end_point[0]
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(),
#                          useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(),
#                              useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#     elif root_node.type in ['while_statement', 'for_statement']:
#         if root_node.type == 'for_statement':
#             start_row = root_node.start_point[0]
#             if root_node.child_by_field_name('condition'):
#                 end_row = root_node.child_by_field_name('condition').end_point[0]
#             else:
#                 end_row = root_node.start_point[0]
#         if root_node.type == 'while_statement':
#             start_row = root_node.start_point[0]
#             if root_node.child_by_field_name('condition'):
#                 end_row = root_node.child_by_field_name('condition').end_point[0]
#             else:
#                 end_row = root_node.start_point[0]
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(),
#                          useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(),
#                              useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#     elif root_node.type == 'if_statement':
#         start_row = root_node.start_point[0]
#         end_row = root_node.child_by_field_name('condition').end_point[0]
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(),
#                          useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(),
#                              useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#     elif root_node.type in ['else', 'except_clause', 'catch_clause', 'finally_clause']:
#         start_row = root_node.start_point[0]
#         end_row = root_node.start_point[0]
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(),
#                          useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(),
#                              useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#     elif 'statement' in root_node.type or 'ERROR' in root_node.type:
#         start_row = root_node.start_point[0]
#         end_row = root_node.end_point[0]
#         if parent is None:
#             CCG.add_node(node_id, nodeType=root_node.type,
#                          startRow=start_row, endRow=end_row,
#                          sourceLines=src_lines[start_row:end_row + 1],
#                          defSet=set(),
#                          useSet=set())
#             parent = node_id
#         else:
#             if CCG.nodes[parent]['startRow'] <= start_row and CCG.nodes[parent]['endRow'] >= end_row:
#                 pass
#             else:
#                 CCG.add_node(node_id, nodeType=root_node.type,
#                              startRow=start_row, endRow=end_row,
#                              sourceLines=src_lines[start_row:end_row + 1],
#                              defSet=set(),
#                              useSet=set())
#                 CCG.add_edge(parent, node_id, 'CDG')
#                 parent = node_id
#
#     for child in root_node.children:
#         if child.type == 'identifier':
#             row = child.start_point[0]
#             col_start = child.start_point[1]
#             col_end = child.end_point[1]
#             identifier_name = src_lines[row][col_start:col_end].strip()
#             if parent is None:
#                 continue
#             if 'definition' in CCG.nodes[parent]['nodeType']:
#                 CCG.nodes[parent]['defSet'].add(identifier_name)
#             elif CCG.nodes[parent]['nodeType'] == 'for_statement':
#                 p = child
#                 while p.parent.type != 'for_statement':
#                     p = p.parent
#                 if p.parent.type == 'for_statement' and p.prev_sibling.type == 'for':
#                     CCG.nodes[parent]['defSet'].add(identifier_name)
#                 else:
#                     CCG.nodes[parent]['useSet'].add(identifier_name)
#             elif CCG.nodes[parent]['nodeType'] in ['assignment_expression', 'local_variable_declaration']:
#                 if child.next_sibling is not None:
#                     CCG.nodes[parent]['defSet'].add(identifier_name)
#                 else:
#                     CCG.nodes[parent]['useSet'].add(identifier_name)
#             elif 'import' in CCG.nodes[parent]['nodeType']:
#                 CCG.nodes[parent]['defSet'].add(identifier_name)
#             else:
#                 CCG.nodes[parent]['useSet'].add(identifier_name)
#         java_control_dependence_graph(child, CCG, src_lines, parent)
#
#     return
#
#
# def java_control_flow_graph(CCG):
#     CFG = nx.MultiDiGraph()
#
#     next_sibling = dict()
#     first_children = dict()
#
#     start_nodes = []
#     for v in CCG.nodes:
#         if len(list(CCG.predecessors(v))) == 0:
#             start_nodes.append(v)
#     start_nodes.sort()
#     for i in range(0, len(start_nodes) - 1):
#         v = start_nodes[i]
#         u = start_nodes[i + 1]
#         next_sibling[v] = u
#     next_sibling[start_nodes[-1]] = None
#
#     for v in CCG.nodes:
#         children = list(CCG.neighbors(v))
#         if len(children) != 0:
#             children.sort()
#             for i in range(0, len(children) - 1):
#                 u = children[i]
#                 w = children[i + 1]
#                 if CCG.nodes[v]['nodeType'] == 'if_statement' and 'clause' in CCG.nodes[w]['nodeType']:
#                     next_sibling[u] = None
#                 else:
#                     next_sibling[u] = w
#             next_sibling[children[-1]] = None
#             first_children[v] = children[0]
#         else:
#             first_children[v] = None
#
#     edge_list = []
#
#     for v in CCG.nodes:
#         # block start control flow
#         if v in first_children.keys():
#             u = first_children[v]
#             if u is not None:
#                 edge_list.append((v, u, 'CFG'))
#         # block end control flow
#         if CCG.nodes[v]['nodeType'] == 'return_statement':
#             pass
#         elif CCG.nodes[v]['nodeType'] in ['break_statement', 'continue_statement']:
#             u = None
#             predecessors = list(CCG.predecessors(v))
#             if not predecessors:
#                 continue
#             p = list(CCG.predecessors(v))[0]
#             while CCG.nodes[p]['nodeType'] not in ['for_statement', 'while_statement']:
#                 if list(CCG.predecessors(p)) == []:
#                     break
#                 p = list(CCG.predecessors(p))[0]
#             if CCG.nodes[v]['nodeType'] == 'break_statement':
#                 u = next_sibling[p]
#             if CCG.nodes[v]['nodeType'] == 'continue_statement':
#                 u = p
#             if u is not None:
#                 edge_list.append((v, u, 'CFG'))
#         elif CCG.nodes[v]['nodeType'] == 'for_statement':
#             u = first_children[v]
#             if u is not None:
#                 edge_list.append((v, u, 'CFG'))
#         elif CCG.nodes[v]['nodeType'] == 'while_statement':
#             u = first_children[v]
#             if u is not None:
#                 edge_list.append((v, u, 'CFG'))
#             u = next_sibling[v]
#             if u is not None:
#                 edge_list.append((v, u, 'CFG'))
#         elif CCG.nodes[v]['nodeType'] in ['if_statement' or 'try_statement']:
#             u = first_children[v]
#             if u is not None:
#                 edge_list.append((v, u, 'CFG'))
#             for u in CCG.neighbors(v):
#                 if 'clause' in CCG.nodes[u]['nodeType']:
#                     edge_list.append((v, u, 'CFG'))
#         elif 'clause' in CCG.nodes[v]['nodeType']:
#             u = first_children[v]
#             if u is not None:
#                 edge_list.append((v, u, 'CFG'))
#
#         u = next_sibling[v]
#         if u is None:
#             p = v
#             while len(list(CCG.predecessors(p))) != 0:
#                 p = list(CCG.predecessors(p))[0]
#                 if CCG.nodes[p]['nodeType'] == 'while_statement':
#                         edge_list.append((v, p, 'CFG'))
#                         break
#                 if CCG.nodes[p]['nodeType'] == 'for_statement':
#                     edge_list.append((v, p, 'CFG'))
#                     break
#                 if CCG.nodes[p]['nodeType'] in ['try_statement', 'if_statement']:
#                     if next_sibling[p] is not None:
#                         edge_list.append((v, next_sibling[p], 'CFG'))
#                         break
#         if u is not None:
#             edge_list.append((v, u, 'CFG'))
#     CFG.add_edges_from(edge_list)
#     for v in CCG.nodes:
#         if v not in CFG.nodes:
#             CFG.add_node(v)
#     return CFG, edge_list
#
#
# def java_data_dependence_graph(CFG, CCG):
#     for v in CCG.nodes:
#         for u in CCG.nodes:
#             if v == u or 'import' in CCG.nodes[v]['nodeType']:
#                 continue
#             # find the definition of u
#             u_def = u
#             u_def_set = set()
#             while len(list(CCG.predecessors(u_def))) != 0:
#                 u_def = list(CCG.predecessors(u_def))[0]
#                 if 'declaration' in CCG.nodes[u_def]['nodeType']:
#                     u_def_set.add(u_def)
#             if 'declaration' in CCG.nodes[v]['nodeType'] and v not in u_def_set:
#                 continue
#             if len(CCG.nodes[v]['defSet'] & CCG.nodes[u]['useSet']) != 0 and nx.has_path(CFG, v, u):
#                 has_path = False
#                 paths = list(nx.all_shortest_paths(CFG, source=v, target=u))
#                 variables = CCG.nodes[v]['defSet'] & CCG.nodes[u]['useSet']
#                 for var in variables:
#                     has_def = False
#                     for path in paths:
#                         for p in path[1:-1]:
#                             if var in CCG.nodes[p]['defSet']:
#                                 has_def = True
#                                 break
#                         if not has_def:
#                             has_path = True
#                             break
#                     if has_path:
#                         break
#                 if has_path:
#                     CCG.add_edge(v, u, 'DDG')
#     return
# #
def create_graph(code_lines, repo_name):

    src_lines = "".join(code_lines).encode('ascii', errors='ignore').decode('ascii')
    src_lines = src_lines.splitlines(keepends=True)

    if len(src_lines) != 0:
        src_lines[-1] = src_lines[-1].rstrip().strip('(').strip('[').strip(',')
    # Define tree-sitter parser
    Language.build_library('./my-languages.so', ['./tree-sitter-python', './tree-sitter-java'])
    language = Language('./my-languages.so', CONSTANTS.repos_language[repo_name])
    parser = Parser()
    parser.set_language(language)

    if len(src_lines) == 0:
        return None

    # remove comment
    comment_prefix = ""
    if language.name == "python":
        comment_prefix = "#"
    elif language.name == "java":
        comment_prefix = "//"


    comment_lines = []
    for i in range(0, len(src_lines)):
        line = src_lines[i]

        if line.lstrip().startswith(comment_prefix):
            src_lines[i] = '\n'
            comment_lines.append(i)

    # Parser file to get a tree
    def read_callable(byte_offset, point):
        row, column = point
        if row >= len(src_lines) or column >= len(src_lines[row]):
            return None
        return src_lines[row][column:].encode('utf8', errors='ignore')

    tree = parser.parse(read_callable)

    all_comment = True
    for child in tree.root_node.children:
        if child.type not in 'comment':
            all_comment = False

    if all_comment:
        return None

    # Initialize program dependence graph
    ccg = nx.MultiDiGraph()

    if language.name == 'python':
        # Construct control dependence edge
        for child in tree.root_node.children:
            python_control_dependence_graph(child, ccg, code_lines, None)

        # Construct control flow graph
        cfg, cfg_edge_list = python_control_flow_graph(ccg)

        # Construct data dependence graph
        python_data_dependence_graph(cfg, ccg)

        ccg.add_edges_from(cfg_edge_list)

    elif language.name == "java":
        # Construct control dependence edge

        for child in tree.root_node.children:
            java_control_dependence_graph(child, ccg, code_lines, None)

        # Construct control flow graph
        cfg, cfg_edge_list = java_control_flow_graph(ccg)

        # Construct data dependence graph
        java_data_dependence_graph(cfg, ccg)
        ccg.add_edges_from(cfg_edge_list)

    # add comment
    node_list = list(ccg.nodes)
    node_list.sort()
    comment_lines.reverse()
    max_comment_line = 0
    for comment_line_num in comment_lines:
        insert_id = -1
        for v in ccg.nodes:
            if ccg.nodes[v]['startRow'] > comment_line_num:
                insert_id = v
                break
        if insert_id == -1:
            max_comment_line = max(max_comment_line, comment_line_num)
        else:
            ccg.nodes[insert_id]['startRow'] = comment_line_num
            end_row = ccg.nodes[insert_id]['endRow']
            ccg.nodes[insert_id]['sourceLines'] = code_lines[comment_line_num: end_row + 1]
    if max_comment_line != 0:
        last_node_id = node_list[-1]
        ccg.nodes[last_node_id]['endRow'] = max_comment_line
        start_row = ccg.nodes[last_node_id]['startRow']
        ccg.nodes[last_node_id]['sourceLines'] = code_lines[start_row: max_comment_line + 1]
    return ccg
