import csv
from collections import defaultdict

input_file = 'langchain_to_label.csv'
output_file = 'filtered_langchain.csv'

# Dictionary to keep count of each id
id_counts = defaultdict(int)
filtered_rows = []

with open(input_file, newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    header = reader.fieldnames  # Save header for writing out later
    
    for row in reader:
        if row['Is Bug'].strip() == '':
            continue
        id_value = row['id']
        if id_counts[id_value] < 5:
            filtered_rows.append(row)
            id_counts[id_value] += 1

# Write filtered rows to a new CSV file
with open(output_file, 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=header)
    writer.writeheader()
    print(f"Writing {len(filtered_rows)} rows to '{output_file}'")
    writer.writerows(filtered_rows)

print(f"Filtered data written to '{output_file}'.")