""" Examine Dataset """

import os
import re
import datasets
import click


@click.command()
@click.option('--dataset-path', type=click.Path(exists=True), help='Path to the dataset.')
def main(
    dataset_path
):
    
    dataset = datasets.load_from_disk(dataset_path)

    for item in dataset['text']:
        match = re.search(r"\<\|start_header_id\|\>assistant\<\|end_header_id\|\>(.+?)\<\|eot_id\|\>", item, re.DOTALL)
        
        # if match is None or match.group(1).strip() == "":
        if match is not None:
            print('=' * 80)
            print(item)
            print('=' * 80)
            
            
if __name__ == '__main__':
    main()