#!/usr/bin/env python3
import re
from statistics import mean, median

def extract_swap_operations(file_path):
    """파일에서 swap out과 swap in 작업을 시간순으로 추출"""
    swap_operations = []
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            for line_num, line in enumerate(f, 1):
                # Swap out 패턴
                swap_out_match = re.search(r'\[SWAP_OUT\] Total blocks: (\d+)', line)
                if swap_out_match:
                    swap_operations.append({
                        'line': line_num,
                        'type': 'SWAP_OUT',
                        'blocks': int(swap_out_match.group(1)),
                        'timestamp': line.split()[1] if len(line.split()) > 1 else ''
                    })
                
                # Swap in 패턴
                swap_in_match = re.search(r'\[SWAP_IN\] Total blocks: (\d+)', line)
                if swap_in_match:
                    swap_operations.append({
                        'line': line_num,
                        'type': 'SWAP_IN',
                        'blocks': int(swap_in_match.group(1)),
                        'timestamp': line.split()[1] if len(line.split()) > 1 else ''
                    })
                    
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return []
    
    return swap_operations

def analyze_swap_patterns(operations):
    """swap 패턴 분석"""
    swap_out_blocks = [op['blocks'] for op in operations if op['type'] == 'SWAP_OUT']
    swap_in_blocks = [op['blocks'] for op in operations if op['type'] == 'SWAP_IN']
    
    print("=== Swap Out vs Swap In Blocks 비교 ===")
    print(f"Swap Out 샘플 수: {len(swap_out_blocks)}")
    print(f"Swap In 샘플 수: {len(swap_in_blocks)}")
    
    if swap_out_blocks:
        print(f"\nSwap Out Blocks:")
        print(f"  평균: {mean(swap_out_blocks):.2f}")
        print(f"  중간값: {median(swap_out_blocks):.2f}")
        print(f"  최소: {min(swap_out_blocks)}")
        print(f"  최대: {max(swap_out_blocks)}")
    
    if swap_in_blocks:
        print(f"\nSwap In Blocks:")
        print(f"  평균: {mean(swap_in_blocks):.2f}")
        print(f"  중간값: {median(swap_in_blocks):.2f}")
        print(f"  최소: {min(swap_in_blocks)}")
        print(f"  최대: {max(swap_in_blocks)}")
    
    # 차이점 분석
    if swap_out_blocks and swap_in_blocks:
        diff = mean(swap_in_blocks) - mean(swap_out_blocks)
        print(f"\n평균 차이 (Swap In - Swap Out): {diff:.2f} blocks")
        print(f"차이 비율: {(diff/mean(swap_out_blocks)*100):.2f}%")
    
    # 시간순 패턴 분석
    print(f"\n=== 시간순 작업 패턴 ===")
    print("처음 10개 작업:")
    for i, op in enumerate(operations[:10]):
        print(f"  {i+1:2d}. {op['type']}: {op['blocks']:3d} blocks (라인 {op['line']})")
    
    # 연속된 swap out/in 쌍 찾기
    consecutive_pairs = []
    for i in range(len(operations) - 1):
        if operations[i]['type'] == 'SWAP_OUT' and operations[i+1]['type'] == 'SWAP_IN':
            consecutive_pairs.append({
                'swap_out': operations[i]['blocks'],
                'swap_in': operations[i+1]['blocks'],
                'difference': operations[i+1]['blocks'] - operations[i]['blocks']
            })
    
    if consecutive_pairs:
        print(f"\n=== 연속된 Swap Out -> Swap In 쌍 분석 ===")
        print(f"총 쌍의 수: {len(consecutive_pairs)}")
        
        differences = [pair['difference'] for pair in consecutive_pairs]
        print(f"Blocks 차이 평균: {mean(differences):.2f}")
        print(f"Blocks 차이 중간값: {median(differences):.2f}")
        print(f"Blocks 차이 최소: {min(differences)}")
        print(f"Blocks 차이 최대: {max(differences)}")
        
        print(f"\n처음 5개 쌍:")
        for i, pair in enumerate(consecutive_pairs[:5]):
            print(f"  {i+1}. Swap Out: {pair['swap_out']:3d} -> Swap In: {pair['swap_in']:3d} (차이: {pair['difference']:+3d})")

def main():
    target_file = "benchmarks/paper_eval/review_test/LongBench_DS_8B_GovReport/benchmark_1000_fc.log"
    
    print(f"파일 분석 중: {target_file}")
    print("=" * 60)
    
    operations = extract_swap_operations(target_file)
    
    if not operations:
        print("Swap 작업을 찾을 수 없습니다.")
        return
    
    analyze_swap_patterns(operations)

if __name__ == "__main__":
    main() 