import numpy as np
x = np.memmap("/root/trainbin1/document1.bin", dtype=np.uint16, mode="r")
print("tokens:", len(x), "max_id:", int(x.max()), "min_id:", int(x.min()))
# 可选：看高分位
print(np.percentile(x, [99.9, 99.99, 100]))
