import random
import json
from pathlib import Path

# Sample relation list (for testing)
relations = ['H < P', 'I > L', 'A < P', 'R < L', 'Y < C', 'V < Y', 'L > Y']

# Obscure/technical-sounding package name pool
package_name_pool = [
    "xnetcore", "alphaparse", "vaultsync", "zlibwrap", "nodeinjector",
    "quantarray", "cryptobind", "streamx", "protoflux", "datasieve",
    "jetcache", "indexvisor", "loadsnap", "fluxmarshal", "yokobase",
    "muxdelta", "hushproxy", "cipherlint", "bitdrain", "corephage",
    "jsonspill", "parasync", "vergeproc", "shardglue", "tokentrap",
    "syncrift", "depthlink", "ghostbind", "corestitch", "refcast",
    "packlane", "shadevault", "pipestream", "bindscope", "memdraft",
    "optivault", "wraptide", "stacktune", "modlinker", "nebulock",
    "forktrail", "lagvizor", "tickroll", "hookmesh", "diskshade",
    "nodemerge", "fuzzcrunch", "obfusgrid", "threadmint", "streamshade",
    "graphrust", "queueforge", "rangeseal", "indexmorph", "netsealer",
    "blobflux", "loopstitch", "jitweave", "cryptosurge", "fluxrigger",
    "snapveil", "authchute", "codecage", "modprobe", "sockhub",
    "prefx", "cargowrap", "vectrobit", "logshard", "wormtrace",
    "muxcage", "heaptrail", "nexusbloom", "tunnelbit", "structfoil",
    "linkmelt", "tracestream", "boundcrate", "bufferpike", "gridsnare"
]

# Dependency phrases
less_than_phrases = [
    "{} must be loaded after {}",
    "{} depends on {}",
    "{} initializes after {}",
    "{} requires {} to be available first",
    "{} needs {} to be ready first",
    "{} starts only after {}",
    "{} comes later than {}",
    "{} executes after {}"
]

greater_than_phrases = [
    "{} must be loaded before {}",
    "{} is a prerequisite for {}",
    "{} initializes before {}",
    "{} is needed by {}",
    "{} must be ready before {} starts",
    "{} runs before {} can execute",
    "{} comes earlier than {}",
    "{} executes before {}"
]

def convert_relations(relations, nodes_label=None):
    """Convert symbolic relations to natural language descriptions."""
    # Get all symbols from relationships by parsing the left and right operands
    symbols_from_relations = set()
    for rel in relations:
        parts = rel.split()
        if len(parts) == 3:  # Expected format: "left op right"
            left, op, right = parts
            symbols_from_relations.add(left)
            symbols_from_relations.add(right)
    
    if nodes_label:
        # Use all nodes from the label to ensure we have mappings for cycles
        all_symbols = set(nodes_label)
    else:
        all_symbols = symbols_from_relations
    
    symbols = sorted(all_symbols)
    name_map = {symbol: name for symbol, name in zip(symbols, random.sample(package_name_pool, len(symbols)))}

    output = []
    for rel in relations:
        left, op, right = rel.split()
        
        # Validate that nodes exist in mapping
        if left not in name_map:
            raise KeyError(f"Node '{left}' from relation '{rel}' not found in node mapping")
        if right not in name_map:
            raise KeyError(f"Node '{right}' from relation '{rel}' not found in node mapping")
            
        left_name = name_map[left]
        right_name = name_map[right]

        if op == "<":
            phrase = random.choice(less_than_phrases).format(left_name, right_name)
        elif op == ">":
            phrase = random.choice(greater_than_phrases).format(left_name, right_name)
        else:
            raise ValueError(f"Unknown operator in relation: {rel}")

        output.append(phrase)

    return output, name_map

def convert_cycles(cycles, name_map):
    """Convert cycles using the same node mapping."""
    converted_cycles = []
    for cycle in cycles:
        converted_cycle = []
        for node in cycle:
            if node not in name_map:
                raise KeyError(f"Node '{node}' from cycle not found in node mapping")
            converted_cycle.append(name_map[node])
        converted_cycles.append(converted_cycle)
    return converted_cycles

def convert_comparison_test(comparison_test, name_map):
    """Convert comparison test using the same node mapping."""
    if not comparison_test:
        return comparison_test
    
    left, op, right = comparison_test.split()
    
    # Validate that nodes exist in mapping
    if left not in name_map:
        raise KeyError(f"Node '{left}' from comparison test not found in node mapping")
    if right not in name_map:
        raise KeyError(f"Node '{right}' from comparison test not found in node mapping")
        
    left_name = name_map[left]
    right_name = name_map[right]
    return f"{left_name} {op} {right_name}"

def process_dataset_row(row):
    """Process a single row from the dataset."""
    # Convert relationships to natural language, using all nodes to ensure complete mapping
    nl_relations, name_map = convert_relations(row['relationships'], row['nodes_label'])
    
    # Convert cycles using the same mapping
    nl_cycles = convert_cycles(row['cycles'], name_map)
    
    # Convert comparison test if it exists
    nl_comparison_test = convert_comparison_test(row.get('comparison_test', ''), name_map)
    
    # Convert node labels
    nl_nodes_label = []
    for node in row['nodes_label']:
        if node not in name_map:
            raise KeyError(f"Node '{node}' from nodes_label not found in node mapping")
        nl_nodes_label.append(name_map[node])
    
    # Create new row with original data for changed columns moved to origin_ prefix
    new_row = {}
    
    # Define which columns are being changed
    changed_columns = {'relationships', 'cycles', 'nodes_label', 'comparison_test'}
    
    # Copy all original data, adding origin_ prefix only to changed columns
    for key, value in row.items():
        if key in changed_columns:
            new_row[f'origin_{key}'] = value
        else:
            new_row[key] = value
    
    # Add converted data using original column names
    new_row.update({
        'relationships': nl_relations,
        'cycles': nl_cycles,
        'nodes_label': nl_nodes_label,
        'comparison_test': nl_comparison_test,
        'node_mapping': name_map
    })
    
    return new_row

def process_jsonl_file(input_path, output_path):
    """Process a JSONL file and output converted version."""
    input_path = Path(input_path)
    output_path = Path(output_path)
    
    if not input_path.exists():
        raise FileNotFoundError(f"Input file not found: {input_path}")
    
    # Create output directory if it doesn't exist
    output_path.parent.mkdir(parents=True, exist_ok=True)
    
    processed_count = 0
    
    with open(input_path, 'r', encoding='utf-8') as infile, \
         open(output_path, 'w', encoding='utf-8') as outfile:
        
        for line_num, line in enumerate(infile, 1):
            try:
                # Parse JSON line
                row = json.loads(line.strip())
                
                # Process the row
                converted_row = process_dataset_row(row)
                
                # Write to output file
                outfile.write(json.dumps(converted_row, ensure_ascii=False) + '\n')
                processed_count += 1
                
            except Exception as e:
                print(f"Error processing line {line_num}: {e}")
                continue
    
    print(f"Successfully processed {processed_count} rows from {input_path} to {output_path}")
    return processed_count

def main():
    """Main function with inline file paths."""
    # Define input and output file paths here
    input_file = "task2.jsonl"  # Change this to your input file path
    output_file = "task2_nl.jsonl"  # Change this to your desired output file path
    
    try:
        process_jsonl_file(input_file, output_file)
    except Exception as e:
        print(f"Error: {e}")
        return 1
    
    return 0

if __name__ == "__main__":
    import sys
    sys.exit(main())
