from datasets import load_from_disk, DatasetDict, Dataset

token = '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'

dataset = load_from_disk('file://data/named_math_formulas_full_250')
# sort the columns
columns = ['name', 'formula', 'formula_name_id', 'label', 'is_text', 'substituted', 'substituted_var', 'substituted_fnc']
columns += list(sorted([c for c in dataset['test'].column_names if 'strategy' in c and not 'count' in c]))
columns += ['strategy_count']
new_dataset = DatasetDict()
for split, ds in dataset.items():
    # Extract data in the specified column order
    reordered_data = {col: ds[col] for col in columns if col in ds.column_names}
    new_dataset[split] = Dataset.from_dict(reordered_data)

new_dataset.push_to_hub('anonymized_ft', token=token)