from datasets import load_dataset

if True:
    nmf = load_dataset('anonymized', split='all').to_pandas()

    #nmf = nmf[nmf['formula_name_id'] == 'Factorial'].reset_index(drop=True)
    nmf = nmf[nmf['formula_name_id'] == 'Pythagorean Theorem'].reset_index(drop=True)
    nmf = nmf.sample(frac=1.0).reset_index(drop=True)
    #nmf = nmf[nmf['formula'].str.contains('sum') & nmf['formula'].str.contains('! =')].reset_index(drop=True)
    #nmf = nmf[nmf['formula'].str.contains('sum') & nmf['formula'].str.contains('! =')].reset_index(drop=True)
    nmf = nmf[nmf['label'] & nmf['formula'].str.startswith(r'\forall')].reset_index(drop=True)
    print(nmf)

    for i, row in nmf.iterrows():
        print(row['formula_name_id'], row['label'], row['name'], row['formula'])

        if i > 1000:
            break

else:
    nmf = load_dataset('anonymized', split='all').to_pandas()

    #nmf = nmf[nmf['formula_name_id'] == 'Factorial'].reset_index(drop=True)
    nmf = nmf.sample(frac=1.0).reset_index(drop=True)
    # nmf = nmf[nmf['formula'].str.contains('sum') & nmf['formula'].str.contains('! =')].reset_index(drop=True)
    # nmf = nmf[nmf['formula'].str.contains('sum') & nmf['formula'].str.contains('! =')].reset_index(drop=True)
    print(nmf)

    for i, row in nmf.iterrows():
        print(row['formula1'], ' & ', row['formula2'], row['label'])

        if i > 1000:
            break
    print(nmf['label'].value_counts())
