import pandas as pd

data = pd.read_parquet("/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/l1/deepscaler-add1k_8k_max9k.parquet")

print(data.head())

data = data.sample(n=2000, random_state=0)

data['data_source'] = 'deepscaler'

data.to_parquet("/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/l1/deepscaler_qwen3_polaris_sampled2k.parquet")