lang_name	keep_rows	remove_rows	total_rows	keep_tokens	remove_tokens	total_tokens	keep_disk	remove_disk	total_disk	source
eng_Latn	1311089898	101082791	1412172689	1209768049692	93234552534	1303002602226	5273.79GB	1389.58GB	6663.36GB	fineweb_en, mala, New CC
rus_Cyrl	858533615	67210488	925744103	1135503823690	90178931423	1225682755113	7820.53GB	2070.72GB	9891.25GB	fineweb-2, mala, New CC
cmn_Hani	713970414	71188876	785159290	745876230028	73835166891	819711396919	2703.03GB	1488.30GB	4191.34GB	fineweb-2, New CC
deu_Latn	668616647	53652398	722269045	632323804701	51112294384	683436099085	2654.70GB	619.14GB	3273.84GB	fineweb-2, mala, New CC
spa_Latn	604454122	43332181	647786303	483748642332	34789270757	518537913089	2361.20GB	464.31GB	2825.52GB	fineweb-2, mala, New CC
fra_Latn	513533126	40315667	553848793	430864822247	33771457583	464636279830	2005.12GB	457.49GB	2462.62GB	fineweb-2, mala, New CC
jpn_Jpan	491465489	42468247	533933736	278140020404	22807354037	300947374441	1859.43GB	470.20GB	2329.63GB	fineweb-2, New CC
ita_Latn	311421021	25504599	336925620	250122965294	20688098340	270811063634	1204.02GB	272.65GB	1476.67GB	fineweb-2, mala, New CC
por_Latn	271475185	18673879	290149064	204642598869	14122468815	218765067684	999.51GB	210.32GB	1209.83GB	fineweb-2, mala, New CC
pol_Latn	223213297	15376792	238590089	180340459776	12593577913	192934037689	848.01GB	171.91GB	1019.93GB	fineweb-2, mala, New CC
nld_Latn	219161133	14027531	233188664	146160884683	9382137457	155543022140	688.83GB	148.09GB	836.91GB	fineweb-2, mala, New CC
ind_Latn	156915320	16208928	173124248	60968494074	5145594340	66114088414	378.92GB	60.38GB	439.30GB	fineweb-2, mala
tur_Latn	143311483	9984830	153296313	118402262943	8209060460	126611323403	576.36GB	135.41GB	711.77GB	fineweb-2, mala, New CC
vie_Latn	87769030	6189904	93958934	110109983350	7709656883	117819640233	531.66GB	108.21GB	639.86GB	fineweb-2, mala, New CC
fas_Arab	82798155	9490432	92288587	67578991262	7909008696	75487999958	485.58GB	113.12GB	598.70GB	fineweb-2, mala, New CC
kor_Hang	79219710	6162796	85382506	59069582058	4618041584	63687623642	313.45GB	62.12GB	375.57GB	fineweb-2, mala, New CC
swe_Latn	77323851	5078520	82402371	59213293115	3919398191	63132691306	250.87GB	68.22GB	319.09GB	fineweb-2, mala, New CC
hun_Latn	70788907	5180743	75969650	65618918098	4856773770	70475691868	297.63GB	81.93GB	379.56GB	fineweb-2, mala, New CC
ukr_Cyrl	67869004	4309161	72178165	53787399655	3411050099	57198449754	399.29GB	76.85GB	476.14GB	fineweb-2, mala, New CC
ell_Grek	67476934	5673174	73150108	57627729245	5033701328	62661430573	395.84GB	104.94GB	500.78GB	fineweb-2, mala, New CC
tha_Thai	55470733	4287430	59758163	46310113539	3586313457	49896426996	489.45GB	102.79GB	592.24GB	fineweb-2, mala, New CC
arb_Arab	53699784	4058820	57758604	25206494574	1919537653	27126032227	259.62GB	67.29GB	326.91GB	fineweb-2, mala
aze_Latn	51375448	6440800	57816248	3304892851	391998142	3696890993	39.02GB	9.96GB	48.99GB	mala
slv_Latn	50407115	4050353	54457468	11664896428	836484895	12501381323	64.47GB	11.78GB	76.25GB	fineweb-2, mala, New CC
cat_Latn	48833850	3777806	52611656	16492529523	1127785501	17620315024	90.31GB	13.27GB	103.58GB	fineweb-2, mala, New CC
fin_Latn	47804845	4089970	51894815	43434149418	3752485923	47186635341	188.26GB	53.66GB	241.92GB	fineweb-2, mala, New CC
ces_Latn	47538243	3205068	50743311	42196504149	2844242984	45040747133	182.19GB	45.39GB	227.58GB	mala, New CC
hbs_Latn	42983851	8053883	51037734	1533546836	287342257	1820889093	20.87GB	5.97GB	26.84GB	mala
fil_Latn	40145082	6324356	46469438	3465395327	477703550	3943098877	29.08GB	8.57GB	37.65GB	fineweb-2, mala
mal_Mlym	39099424	3879628	42979052	7004739037	558833199	7563572236	87.99GB	17.05GB	105.03GB	fineweb-2, mala, New CC
nob_Latn	38880874	4327512	43208386	24132117400	2810156027	26942273427	130.25GB	61.74GB	191.99GB	fineweb-2, mala
guj_Gujr	38815483	4542267	43357750	5066610791	461536476	5528147267	55.95GB	12.56GB	68.52GB	fineweb-2, mala, New CC
bul_Cyrl	37109490	2564551	39674041	32285330961	2225794143	34511125104	228.95GB	52.02GB	280.97GB	fineweb-2, mala, New CC
kan_Knda	34200429	2703744	36904173	4761010852	359209522	5120220374	64.09GB	10.36GB	74.45GB	fineweb-2, mala, New CC
hin_Deva	29149297	2467121	31616418	22078969777	1812839173	23891808950	204.39GB	43.26GB	247.65GB	fineweb-2, mala, New CC
tam_Taml	26549719	2897512	29447231	5880141624	460751525	6340893149	74.75GB	17.96GB	92.72GB	fineweb-2, mala, New CC
kaz_Cyrl	25783423	1665775	27449198	6368127023	432669049	6800796072	59.94GB	12.10GB	72.03GB	fineweb-2, mala, New CC
heb_Hebr	25236869	1610727	26847596	20735757088	1333874583	22069631671	137.70GB	26.77GB	164.47GB	fineweb-2, mala, New CC
ara_Arab	25144413	3242838	28387251	17213574044	2230033226	19443607270	142.24GB	66.99GB	209.23GB	mala, New CC
srp_Cyrl	25133722	1750095	26883817	6910169257	496071441	7406240698	56.20GB	7.92GB	64.12GB	fineweb-2, mala, New CC
est_Latn	24178713	2857229	27035942	2889535853	294199417	3183735270	24.37GB	8.30GB	32.67GB	mala, New CC
sqi_Latn	24157991	3250860	27408851	2375287198	237810779	2613097977	19.63GB	4.68GB	24.32GB	mala, New CC
isl_Latn	24060884	2229951	26290835	6324840132	561737628	6886577760	32.48GB	8.46GB	40.95GB	fineweb-2, mala, New CC
pan_Guru	24024934	3161192	27186126	2267588753	227604665	2495193418	24.86GB	8.00GB	32.85GB	mala, New CC
mlt_Latn	23368058	2083800	25451858	3242175100	322802170	3564977270	15.27GB	4.62GB	19.89GB	fineweb-2, mala, New CC
mkd_Cyrl	22606373	1893276	24499649	5286572157	396984521	5683556678	47.84GB	6.59GB	54.43GB	fineweb-2, mala, New CC
bos_Latn	21617841	1708944	23326785	11011860447	831587878	11843448325	55.61GB	9.93GB	65.55GB	fineweb-2, mala, New CC
kat_Geor	20267734	1298877	21566611	6159450048	413356344	6572806392	76.87GB	14.07GB	90.94GB	fineweb-2, mala, New CC
lit_Latn	20086404	1511808	21598212	17469370400	1327008316	18796378716	85.02GB	17.04GB	102.07GB	fineweb-2, mala, New CC
ben_Beng	19903823	1373977	21277800	12264843404	848747436	13113590840	133.78GB	28.28GB	162.05GB	fineweb-2, mala, New CC
hrv_Latn	19832012	1535666	21367678	15023164163	1191215105	16214379268	71.28GB	15.50GB	86.78GB	fineweb-2, mala, New CC
glg_Latn	19314836	1577639	20892475	4452701716	372719013	4825420729	26.45GB	4.19GB	30.64GB	fineweb-2, mala, New CC
ron_Latn	18277620	1416130	19693750	23419218925	1814233268	25233452193	103.32GB	18.75GB	122.08GB	mala, New CC
ceb_Latn	18142710	1824813	19967523	1905000074	184518567	2089518641	13.14GB	1.92GB	15.07GB	fineweb-2, mala, New CC
hye_Armn	16931618	1402451	18334069	4646434746	392682044	5039116790	38.46GB	10.02GB	48.48GB	fineweb-2, mala, New CC
msa_Latn	16895725	1509247	18404972	12271781872	1051426013	13323207885	62.58GB	31.87GB	94.45GB	mala, New CC
tgk_Cyrl	16597415	1037857	17635272	3459399965	241467427	3700867392	27.01GB	4.67GB	31.68GB	fineweb-2, mala, New CC
mar_Deva	15369095	1352852	16721947	4051127609	287284906	4338412515	48.89GB	6.67GB	55.56GB	fineweb-2, mala, New CC
bel_Cyrl	15223440	1063399	16286839	5300207513	353845465	5654052978	42.12GB	6.30GB	48.42GB	fineweb-2, mala, New CC
nep_Deva	13179513	1735050	14914563	3397388068	354948901	3752336969	53.76GB	13.18GB	66.94GB	mala, New CC
urd_Arab	12916548	1282876	14199424	5630362004	463494414	6093856418	40.38GB	7.76GB	48.14GB	fineweb-2, mala, New CC
slk_Latn	12785724	850416	13636140	10712942539	712569284	11425511823	49.82GB	9.32GB	59.14GB	mala, New CC
mon_Cyrl	11457963	1374100	12832063	2046997684	225169746	2272167430	23.80GB	7.35GB	31.14GB	mala, New CC
dan_Latn	11334787	645362	11980149	8911302729	506753046	9418055775	39.56GB	8.67GB	48.23GB	mala, New CC
eus_Latn	10878956	720917	11599873	2855972563	180733600	3036706163	17.27GB	2.78GB	20.05GB	fineweb-2, mala, New CC
azj_Latn	10372969	764572	11137541	6022895665	427970898	6450866563	50.72GB	9.30GB	60.02GB	fineweb-2, mala, New CC
swa_Latn	10315887	1779710	12095597	968630506	131703640	1100334146	8.27GB	2.41GB	10.68GB	mala, New CC
als_Latn	9944153	695212	10639365	7836141782	540493626	8376635408	20.64GB	3.54GB	24.18GB	fineweb-2, mala
sin_Sinh	9909180	1122278	11031458	2925072011	251401829	3176473840	30.48GB	7.12GB	37.60GB	fineweb-2, mala, New CC
lat_Latn	9860432	968127	10828559	1668306644	209536740	1877843384	8.31GB	3.12GB	11.43GB	fineweb-2, mala, New CC
tel_Telu	9811862	790372	10602234	3896863854	293324245	4190188099	44.54GB	8.60GB	53.13GB	fineweb-2, mala, New CC
afr_Latn	9380855	858541	10239396	3019920317	252812540	3272732857	14.95GB	2.87GB	17.82GB	fineweb-2, mala, New CC
ekk_Latn	9242101	772474	10014575	4786558837	401825098	5188383935	35.70GB	11.02GB	46.72GB	fineweb-2, mala
zsm_Latn	8669616	795543	9465159	4221713581	365482979	4587196560	29.38GB	8.32GB	37.70GB	fineweb-2, mala
ltz_Latn	8585994	1206703	9792697	1184897509	146264352	1331161861	6.31GB	1.78GB	8.09GB	fineweb-2, mala, New CC
som_Latn	7468883	716703	8185586	2202705680	193457169	2396162849	9.57GB	3.11GB	12.68GB	fineweb-2, mala, New CC
kir_Cyrl	6471465	468938	6940403	2309361964	183288823	2492650787	19.56GB	3.38GB	22.94GB	fineweb-2, mala, New CC
cym_Latn	6470707	515431	6986138	2006982648	141852600	2148835248	9.58GB	1.85GB	11.43GB	fineweb-2, mala, New CC
nor_Latn	6132979	733565	6866544	1270841389	150121421	1420962810	8.30GB	2.56GB	10.85GB	mala, New CC
uzb_Latn	6067357	715365	6782722	929536346	98711969	1028248315	8.16GB	2.55GB	10.70GB	mala, New CC
und_Kana	5828958	1112743	6941701	1127498758	219262371	1346761129	15.74GB	13.35GB	29.09GB	fineweb-2
mya_Mymr	5797901	449022	6246923	5283838392	404359403	5688197795	37.30GB	7.01GB	44.31GB	fineweb-2, mala, New CC
epo_Latn	5774232	456781	6231013	2379901847	177310694	2557212541	11.20GB	2.09GB	13.29GB	fineweb-2, mala, New CC
ary_Arab	5674014	465361	6139375	1381043534	114170897	1495214431	16.87GB	4.02GB	20.90GB	fineweb-2, mala
lvs_Latn	5511570	382807	5894377	2735089643	185991078	2921080721	20.10GB	6.38GB	26.48GB	fineweb-2, mala
hau_Latn	5483504	662278	6145782	438939432	49378753	488318185	3.00GB	1.02GB	4.02GB	mala
gle_Latn	5469893	428919	5898812	1646862879	134535091	1781397970	8.77GB	1.45GB	10.21GB	fineweb-2, mala, New CC
nno_Latn	5193977	553478	5747455	1352669317	124047788	1476717105	6.97GB	1.72GB	8.69GB	fineweb-2, mala, New CC
ory_Orya	5126879	444546	5571425	325737649	23327622	349065271	6.83GB	1.00GB	7.84GB	fineweb-2, mala
amh_Ethi	4864739	302322	5167061	1206165659	77954431	1284120090	9.57GB	1.45GB	11.02GB	fineweb-2, mala, New CC
khm_Khmr	4742933	344103	5087036	2232352688	158446037	2390798725	28.40GB	4.27GB	32.67GB	fineweb-2, mala, New CC
tat_Cyrl	4716122	390385	5106507	1289634062	103348827	1392982889	10.86GB	2.01GB	12.87GB	fineweb-2, mala, New CC
und_Bamu	4710727	1003273	5714000	199461487	42487126	241948613	74.20GB	18.13GB	92.33GB	fineweb-2
und_Copt	4396025	361856	4757881	218113278	17953855	236067133	8.34GB	820.70MB	9.14GB	fineweb-2
arz_Arab	4192689	347362	4540051	794229453	62860109	857089562	6.40GB	1.08GB	7.48GB	fineweb-2, mala, New CC
und_Tang	3940389	741813	4682202	209679616	39474037	249153653	21.14GB	7.14GB	28.28GB	fineweb-2
und_Xsux	3897670	694594	4592264	276929680	49350944	326280624	12.89GB	9.07GB	21.96GB	fineweb-2
lav_Latn	3761886	347112	4108998	2117953391	196445717	2314399108	13.00GB	6.85GB	19.86GB	mala, New CC
pus_Arab	3714572	493239	4207811	905769724	106283077	1012052801	7.14GB	2.20GB	9.33GB	mala, New CC
hbs_Cyrl	3468184	463549	3931733	131154368	17533630	148687998	2.30GB	519.49MB	2.81GB	mala, New CC
war_Latn	3428865	283720	3712585	137362551	11186470	148549021	1.72GB	154.06MB	1.87GB	fineweb-2, mala, New CC
und_Yiii	3389232	417382	3806614	232881682	28679247	261560929	24.04GB	5.81GB	29.85GB	fineweb-2
multi_Latn	3107041	394014	3501055	2394003291	303446934	2697450225	17.16GB	7.08GB	24.24GB	New CC
mlg_Latn	2848787	437742	3286529	288343925	41289304	329633229	2.55GB	730.41MB	3.26GB	mala, New CC
und_Hira	2784331	579376	3363707	361772656	75279266	437051922	4.53GB	3.76GB	8.30GB	fineweb-2
uzn_Cyrl	2607331	304118	2911449	396888383	30844213	427732596	5.95GB	1.37GB	7.32GB	fineweb-2, mala
hat_Latn	2579899	226905	2806804	464182180	41249393	505431573	2.42GB	523.00MB	2.93GB	fineweb-2, mala, New CC
zul_Latn	2469561	294213	2763774	333050580	38274466	371325046	2.00GB	613.05MB	2.60GB	fineweb-2, mala
kur_Latn	2408980	327926	2736906	482017657	51673963	533691620	3.17GB	993.86MB	4.14GB	mala
div_Thaa	2251318	263724	2515042	418221698	43981761	462203459	4.07GB	969.02MB	5.01GB	fineweb-2, mala, New CC
tgl_Latn	2243457	345689	2589146	369185190	35556642	404741832	2.56GB	638.68MB	3.18GB	mala, New CC
uzb_Cyrl	2223336	314253	2537589	194015771	27596487	221612258	2.76GB	1.06GB	3.82GB	mala
fry_Latn	2143363	232493	2375856	605324884	65895071	671219955	2.89GB	871.76MB	3.74GB	fineweb-2, mala, New CC
sna_Latn	2140911	181610	2322521	295326389	24539379	319865768	1.71GB	408.90MB	2.11GB	fineweb-2, mala
fao_Latn	2091383	163656	2255039	199427123	14185118	213612241	1.57GB	374.64MB	1.94GB	fineweb-2, mala
und_Laoo	2055385	364696	2420081	212140743	37641065	249781808	3.91GB	2.41GB	6.32GB	fineweb-2
sun_Latn	1993116	193823	2186939	275243524	25283531	300527055	1.59GB	518.40MB	2.10GB	fineweb-2, mala, New CC
snd_Arab	1906809	154845	2061654	1115520953	104995934	1220516887	4.91GB	1.75GB	6.66GB	fineweb-2, mala, New CC
und_Cyrl	1859952	427202	2287154	1318689379	302882407	1621571786	4.74GB	17.52GB	22.26GB	fineweb-2
und_Kits	1858513	315452	2173965	269537597	45749571	315287168	11.61GB	15.94GB	27.55GB	fineweb-2
bak_Cyrl	1853030	132427	1985457	401912595	27615015	429527610	3.60GB	699.52MB	4.28GB	fineweb-2, mala, New CC
asm_Beng	1819123	115517	1934640	380777691	23668706	404446397	4.19GB	865.12MB	5.03GB	fineweb-2, mala, New CC
cos_Latn	1785324	274663	2059987	228063884	35244405	263308289	1.02GB	553.13MB	1.56GB	mala
ckb_Arab	1781141	177877	1959018	841600318	76590473	918190791	6.04GB	1.41GB	7.45GB	fineweb-2, mala, New CC
und_Hluw	1714758	374917	2089675	70773026	15473909	86246935	2.97GB	3.21GB	6.19GB	fineweb-2
ast_Latn	1629413	144175	1773588	213119458	19079424	232198882	1.30GB	367.59MB	1.66GB	fineweb-2, mala, New CC
jpn_Japn	1603444	177396	1780840	148767026	17991990	166759016	5.64GB	2.01GB	7.65GB	mala
ibo_Latn	1590738	117637	1708375	233496425	16647241	250143666	1.35GB	425.41MB	1.76GB	fineweb-2, mala
und_Grek	1569549	224657	1794206	755835440	108186315	864021755	6.47GB	6.68GB	13.15GB	fineweb-2
mri_Latn	1534556	133721	1668277	354497489	28724714	383222203	1.59GB	450.64MB	2.03GB	fineweb-2, mala
ars_Arab	1530800	108785	1639585	461046240	32763858	493810098	4.54GB	1.72GB	6.26GB	fineweb-2
anp_Deva	1438448	140264	1578712	805494914	78543685	884038599	9.96GB	1.98GB	11.93GB	fineweb-2, mala
khk_Cyrl	1438065	128138	1566203	615036855	54802524	669839379	7.61GB	1.70GB	9.32GB	fineweb-2
und_Shrd	1407082	216594	1623676	130798763	20134027	150932790	5.64GB	2.18GB	7.83GB	fineweb-2
lao_Laoo	1395591	105796	1501387	628083421	49711455	677794876	7.12GB	1.27GB	8.39GB	fineweb-2, mala, New CC
und_Lina	1369182	271627	1640809	130393849	25868357	156262206	6.49GB	3.58GB	10.08GB	fineweb-2
und_Samr	1351547	158986	1510533	64056628	7535148	71591776	4.00GB	1.60GB	5.61GB	fineweb-2
ori_Orya	1335134	145907	1481041	128692048	11966672	140658720	2.01GB	734.47MB	2.73GB	mala
jav_Latn	1256017	122508	1378525	379692611	35262346	414954957	1.83GB	560.53MB	2.38GB	fineweb-2, mala, New CC
yid_Hebr	1246571	160660	1407231	287371557	36138475	323510032	2.65GB	1.21GB	3.86GB	mala, New CC
und_Cans	1231332	248047	1479379	106394586	21432772	127827358	3.25GB	2.58GB	5.82GB	fineweb-2
nya_Latn	1205461	138307	1343768	230593919	26287180	256881099	1.25GB	417.53MB	1.66GB	fineweb-2, mala
hmn_Latn	1203798	195179	1398977	173066894	28591695	201658589	1.01GB	518.71MB	1.51GB	mala
tir_Ethi	1200652	78323	1278975	125794757	8160310	133955067	1.07GB	277.10MB	1.34GB	fineweb-2, mala
uig_Arab	1187691	78596	1266287	513721480	37424973	551146453	3.46GB	894.22MB	4.34GB	fineweb-2, mala, New CC
wln_Latn	1177825	74381	1252206	53985428	3609088	57594516	496.29MB	74.59MB	570.88MB	fineweb-2, mala, New CC
und_Adlm	1122406	194295	1316701	43627839	7552232	51180071	1.02GB	814.39MB	1.82GB	fineweb-2
und_Egyp	1119452	190500	1309952	97414397	16577256	113991653	2.36GB	3.28GB	5.64GB	fineweb-2
und_Syrc	1117085	115880	1232965	42713980	4430903	47144883	19.26GB	4.04GB	23.29GB	fineweb-2
swh_Latn	1116479	82674	1199153	449922982	32709567	482632549	3.11GB	765.91MB	3.86GB	fineweb-2, mala
yor_Latn	1115090	108671	1223761	189616381	18770619	208387000	1.00GB	290.82MB	1.29GB	fineweb-2, mala, New CC
uzn_Latn	1030779	68064	1098843	466187028	30783081	496970109	3.75GB	984.28MB	4.72GB	fineweb-2
und_Mend	1025555	293719	1319274	16584104	4749688	21333792	852.01MB	1.92GB	2.75GB	fineweb-2
xho_Latn	1022752	88436	1111188	168588652	13928346	182516998	1.11GB	236.24MB	1.34GB	fineweb-2, mala
gla_Latn	1011881	115439	1127320	518470312	76338962	594809274	1.89GB	862.84MB	2.74GB	fineweb-2, mala, New CC
bre_Latn	980751	86361	1067112	134684418	11683477	146367895	722.44MB	220.74MB	943.18MB	fineweb-2, mala, New CC
sot_Latn	917368	78477	995845	223239399	17818098	241057497	1.01GB	270.04MB	1.28GB	fineweb-2, mala
nan_Latn	905480	86684	992164	26581741	2542373	29124114	461.57MB	90.68MB	552.25MB	fineweb-2, mala
tel_Latn	898416	92513	990929	204174363	21268208	225442571	804.44MB	424.30MB	1.20GB	fineweb-2, mala
bew_Latn	885967	99331	985298	370268275	41512966	411781241	2.65GB	740.55MB	3.37GB	fineweb-2
smo_Latn	883152	83255	966407	241447679	21174910	262622589	1.07GB	277.36MB	1.34GB	fineweb-2, mala
glk_Arab	876518	99662	976180	44947361	5295917	50243278	601.18MB	163.50MB	764.68MB	fineweb-2, mala
che_Cyrl	875249	117291	992540	118782781	15175063	133957844	1004.27MB	330.76MB	1.30GB	fineweb-2, mala, New CC
orm_Latn	859552	77396	936948	35455189	3192465	38647654	454.59MB	143.07MB	597.66MB	mala
zho_Hani	840529	65421	905950	578500034	46682125	625182159	2.48GB	935.49MB	3.40GB	mala
haw_Latn	808973	88123	897096	227682786	23607105	251289891	828.93MB	286.48MB	1.09GB	fineweb-2, mala
pnb_Arab	806696	71035	877731	133554258	11759081	145313339	840.98MB	470.54MB	1.28GB	fineweb-2, mala, New CC
oci_Latn	760653	59163	819816	123298619	10544506	133843125	673.94MB	184.72MB	858.66MB	fineweb-2, mala, New CC
und_Linb	735073	107674	842747	52969489	7759007	60728496	5.87GB	951.67MB	6.80GB	fineweb-2
chv_Cyrl	731681	60721	792402	188931289	16352295	205283584	1.02GB	345.08MB	1.36GB	fineweb-2, mala, New CC
kin_Latn	701701	67289	768990	197645683	16840972	214486655	1.33GB	152.85MB	1.48GB	fineweb-2, mala
srp_Latn	630883	54645	685528	158438190	13194623	171632813	739.10MB	199.78MB	938.88MB	mala
und_Brai	590104	125329	715433	57846795	12285768	70132563	1.80GB	1.21GB	3.02GB	fineweb-2
kaa_Cyrl	588714	48010	636724	1077791157	86210883	1164002040	3.33GB	591.84MB	3.91GB	fineweb-2, mala
lug_Latn	570875	40314	611189	36434285	2645136	39079421	328.94MB	81.26MB	410.20MB	fineweb-2, mala
und_Sgnw	567289	106447	673736	37335201	7005636	44340837	1.30GB	1.03GB	2.33GB	fineweb-2
pcm_Latn	563555	80446	644001	135969753	19602630	155572383	1.35GB	220.55MB	1.57GB	fineweb-2, mala
pbt_Arab	556450	36696	593146	273038535	18001871	291040406	2.24GB	459.13MB	2.69GB	fineweb-2, mala
min_Latn	548216	32976	581192	28263734	1778558	30042292	311.78MB	41.31MB	353.09MB	fineweb-2, mala
tuk_Latn	526599	48403	575002	211693996	23044261	234738257	1.06GB	351.17MB	1.40GB	fineweb-2, mala
lim_Latn	526453	43831	570284	49155865	4853085	54008950	322.41MB	67.00MB	389.41MB	fineweb-2, mala, New CC
und_Hung	520096	155234	675330	42343188	12638248	54981436	1.80GB	2.16GB	3.96GB	fineweb-2
gsw_Latn	519601	64762	584363	171127342	22150339	193277681	1.88GB	236.94MB	2.11GB	fineweb-2, mala, New CC
aze_Arab	481854	107193	589047	16648051	3703517	20351568	270.78MB	119.59MB	390.37MB	mala
kmr_Latn	473752	37033	510785	239776283	19235360	259011643	1.53GB	349.17MB	1.87GB	fineweb-2, mala, New CC
roh_Latn	467789	40875	508664	59955655	5001132	64956787	356.52MB	127.43MB	483.95MB	fineweb-2, mala, New CC
vec_Latn	451528	28943	480471	35505789	2411583	37917372	237.43MB	66.99MB	304.42MB	fineweb-2, mala
san_Deva	426600	30299	456899	186191638	14189057	200380695	1.27GB	843.45MB	2.10GB	fineweb-2, mala, New CC
und_Bali	422491	77078	499569	39617063	7227619	46844682	1.11GB	632.20MB	1.73GB	fineweb-2
und_Nshu	419712	89398	509110	38530917	8207025	46737942	947.06MB	1.19GB	2.12GB	fineweb-2
und_Modi	386819	67330	454149	52581442	9152364	61733806	15.32GB	6.91GB	22.23GB	fineweb-2
gmh_Latn	383582	47470	431052	769118138	95181833	864299971	5.13GB	1.32GB	6.46GB	fineweb-2
sco_Latn	382193	37494	419687	43052956	4462316	47515272	341.07MB	93.90MB	434.97MB	fineweb-2, mala
nds_Latn	379541	44239	423780	79449114	11680148	91129262	366.92MB	120.62MB	487.54MB	fineweb-2, mala, New CC
und_Lana	377578	110799	488377	47547716	13952718	61500434	656.28MB	1.91GB	2.55GB	fineweb-2
azb_Arab	376136	24159	400295	81100334	6512897	87613231	587.17MB	194.45MB	781.61MB	fineweb-2, mala, New CC
tsn_Latn	375822	23431	399253	24793693	1539868	26333561	196.99MB	39.40MB	236.40MB	fineweb-2, mala
und_Mong	364921	51360	416281	78042843	10983967	89026810	1.23GB	789.07MB	2.00GB	fineweb-2
sah_Cyrl	357019	24172	381191	110127024	7767196	117894220	998.26MB	193.37MB	1.16GB	mala, New CC
und_Ethi	351771	49199	400970	39746869	5559031	45305900	1.15GB	1.00GB	2.15GB	fineweb-2
rus_Latn	349613	47555	397168	77311596	10539553	87851149	720.03MB	463.00MB	1.16GB	mala
pri_Latn	348994	27203	376197	142266923	11088423	153355346	2.00GB	482.39MB	2.47GB	fineweb-2, New CC
und_Hebr	345200	46867	392067	17418774	2364906	19783680	522.83MB	439.74MB	962.57MB	fineweb-2
mon_Latn	344803	46679	391482	31562725	4272922	35835647	171.78MB	258.68MB	430.46MB	mala
pap_Latn	339801	22617	362418	127886685	8519807	136406492	647.29MB	212.76MB	860.05MB	fineweb-2, mala
tgk_Latn	337952	48394	386346	26438893	3785992	30224885	188.90MB	209.04MB	397.94MB	mala
plt_Latn	330574	28226	358800	118460554	8017834	126478388	907.24MB	181.18MB	1.06GB	fineweb-2, mala
lmo_Latn	324184	29246	353430	41372202	4087243	45459445	220.11MB	56.19MB	276.30MB	fineweb-2, mala, New CC
bod_Tibt	318522	34223	352745	252055519	28333902	280389421	3.14GB	952.50MB	4.07GB	mala, New CC
und_Saur	315779	73823	389602	15262640	3568108	18830748	380.09MB	466.42MB	846.50MB	fineweb-2
yue_Hani	300490	34038	334528	9022287	1033780	10056067	754.23MB	153.57MB	907.80MB	fineweb-2, mala, New CC
bar_Latn	270311	30793	301104	92484503	12459866	104944369	303.67MB	135.77MB	439.44MB	fineweb-2, mala
und_Thaa	262004	37561	299565	7676020	1100437	8776457	373.46MB	251.02MB	624.48MB	fineweb-2
und_Dupl	258897	53065	311962	14141364	2898494	17039858	717.72MB	479.65MB	1.17GB	fineweb-2
arg_Latn	258196	22522	280718	29968199	3050366	33018565	198.25MB	41.56MB	239.80MB	fineweb-2, mala, New CC
pms_Latn	258133	20250	278383	23550682	1855427	25406109	164.20MB	37.24MB	201.44MB	fineweb-2, mala, New CC
hif_Latn	254946	37466	292412	220188883	38738091	258926974	742.93MB	838.96MB	1.54GB	fineweb-2, mala
und_Thai	254351	47642	301993	47880654	8968434	56849088	828.46MB	310.74MB	1.11GB	fineweb-2
und_Runr	252177	39003	291180	154681829	23923892	178605721	1.16GB	3.05GB	4.21GB	fineweb-2
und_Vaii	243465	93267	336732	71284748	27307886	98592634	489.53MB	1.75GB	2.23GB	fineweb-2
vol_Latn	241216	23733	264949	12263160	1277740	13540900	120.59MB	26.50MB	147.09MB	fineweb-2, mala, New CC
und_Glag	237678	72074	309752	20382745	6180908	26563653	454.53MB	907.86MB	1.33GB	fineweb-2
nrm_Latn	234986	31988	266974	71123343	9679192	80802535	623.95MB	223.13MB	847.08MB	fineweb-2, mala
aeb_Arab	230690	32194	262884	51787621	7227234	59014855	611.71MB	222.12MB	833.83MB	fineweb-2
kat_Latn	229636	46985	276621	37422991	7656984	45079975	235.88MB	348.49MB	584.38MB	mala
ido_Latn	222870	22616	245486	15648562	1484861	17133423	125.75MB	34.15MB	159.90MB	fineweb-2, mala, New CC
kal_Latn	220324	17347	237671	76084692	6029455	82114147	353.94MB	192.91MB	546.85MB	fineweb-2, mala
pam_Latn	219651	22527	242178	21423196	2448261	23871457	123.68MB	35.43MB	159.11MB	fineweb-2, mala
und_Khmr	216989	36249	253238	10970789	1832720	12803509	451.42MB	398.62MB	850.04MB	fineweb-2
lus_Latn	206911	16421	223332	66594095	5158458	71752553	369.22MB	108.92MB	478.14MB	fineweb-2, mala
und_Mymr	204739	27296	232035	5634372	751179	6385551	270.03MB	237.63MB	507.65MB	fineweb-2
und_Tibt	201485	32842	234327	15437322	2516279	17953601	925.56MB	481.84MB	1.37GB	fineweb-2
und_Dsrt	197996	37900	235896	4469212	855487	5324699	237.30MB	536.84MB	774.15MB	fineweb-2
und_Geor	196346	49504	245850	22222812	5602956	27825768	357.20MB	600.63MB	957.84MB	fineweb-2
new_Deva	187269	16225	203494	23861964	2065620	25927584	288.82MB	85.41MB	374.23MB	fineweb-2, mala, New CC
und_Mroo	186137	22853	208990	6423841	788688	7212529	2.27GB	319.84MB	2.58GB	fineweb-2
sme_Latn	184427	14876	199303	42270878	3525619	45796497	304.03MB	88.07MB	392.11MB	fineweb-2, mala
und_Bopo	181708	24447	206155	30630500	4121028	34751528	3.22GB	849.42MB	4.05GB	fineweb-2
nso_Latn	175980	9656	185636	18891826	1076766	19968592	106.63MB	30.80MB	137.43MB	fineweb-2, mala
und_Armn	168059	46687	214746	33054778	9182658	42237436	331.08MB	491.86MB	822.94MB	fineweb-2
und_Mtei	166923	19644	186567	48489481	5706387	54195868	758.98MB	544.46MB	1.27GB	fineweb-2
scn_Latn	162546	10711	173257	18069237	1484413	19553650	119.49MB	24.28MB	143.76MB	fineweb-2, mala, New CC
ina_Latn	159802	16993	176795	13619366	1492374	15111740	99.48MB	26.72MB	126.20MB	fineweb-2, mala, New CC
lld_Latn	154215	24985	179200	8000831	1248359	9249190	86.31MB	16.03MB	102.33MB	fineweb-2, mala
und_Khar	153370	40038	193408	6754817	1763378	8518195	238.70MB	173.93MB	412.63MB	fineweb-2
hyw_Armn	142710	12895	155605	60770654	5522295	66292949	823.68MB	166.11MB	989.79MB	fineweb-2, mala
und_Deva	141127	26074	167201	35957610	6643369	42600979	244.13MB	1.43GB	1.67GB	fineweb-2
abk_Cyrl	139722	12856	152578	7725555	671844	8397399	95.66MB	13.90MB	109.56MB	fineweb-2, mala
und_Brah	138030	22724	160754	7853310	1292897	9146207	261.03MB	232.46MB	493.50MB	fineweb-2
bpy_Beng	135664	9499	145163	9299467	766180	10065647	135.33MB	26.96MB	162.29MB	fineweb-2, mala, New CC
bew_Cyrl	133830	13490	147320	3369817	339676	3709493	70.69MB	15.08MB	85.77MB	mala
lin_Latn	133642	8675	142317	16035104	1372183	17407287	110.27MB	30.77MB	141.04MB	fineweb-2, mala
und_Bhks	131896	27029	158925	3931080	805583	4736663	182.11MB	147.47MB	329.58MB	fineweb-2
oss_Cyrl	128058	13975	142033	84795639	9561587	94357226	372.34MB	159.28MB	531.62MB	fineweb-2, mala, New CC
tgk_Arab	127771	14974	142745	11608256	1360418	12968674	99.29MB	52.94MB	152.23MB	mala
szl_Latn	127603	10328	137931	8515961	738212	9254173	85.82MB	12.22MB	98.04MB	fineweb-2, mala
mww_Latn	122304	10216	132520	98369197	8216736	106585933	511.63MB	99.37MB	611.00MB	fineweb-2
sdh_Arab	120041	14202	134243	35259623	4502397	39762020	444.91MB	130.65MB	575.56MB	fineweb-2, mala
und_Hmnp	118870	12334	131204	6826948	708367	7535315	416.07MB	144.78MB	560.85MB	fineweb-2
srd_Latn	118783	8139	126922	15381237	1228699	16609936	114.22MB	23.06MB	137.28MB	fineweb-2, mala
mhr_Cyrl	118768	12583	131351	30711484	3171784	33883268	265.90MB	71.78MB	337.69MB	fineweb-2, mala, New CC
ydd_Hebr	117784	7278	125062	73708154	4554535	78262689	838.91MB	115.11MB	954.03MB	fineweb-2, mala
diq_Latn	117095	11778	128873	9746257	962884	10709141	71.95MB	15.59MB	87.53MB	fineweb-2, mala, New CC
und_Telu	115913	30828	146741	9004468	2394811	11399279	390.34MB	407.21MB	797.55MB	fineweb-2
que_Latn	114283	23930	138213	4282918	896825	5179743	55.08MB	32.27MB	87.35MB	mala, New CC, New CC
run_Latn	114035	9287	123322	24631436	1972941	26604377	208.43MB	37.51MB	245.94MB	fineweb-2, mala
hsb_Latn	112762	9949	122711	25096207	2043219	27139426	146.00MB	22.71MB	168.70MB	fineweb-2, mala, New CC
wol_Latn	108941	11076	120017	11755874	1370150	13126024	91.55MB	28.37MB	119.92MB	fineweb-2, mala
rmy_Latn	108228	21113	129341	284557286	55705068	340262354	2.37GB	94.36MB	2.46GB	fineweb-2, mala
und_Phag	107755	17582	125337	3409789	556363	3966152	135.12MB	88.99MB	224.10MB	fineweb-2
und_Merc	107519	38040	145559	7610200	2692473	10302673	205.45MB	450.35MB	655.81MB	fineweb-2
urd_Latn	106750	12604	119354	139191270	16434349	155625619	298.21MB	133.79MB	432.00MB	fineweb-2
kiu_Latn	106482	10355	116837	36528860	3761474	40290334	276.25MB	184.75MB	461.00MB	fineweb-2, mala
cak_Latn	106284	6639	112923	6079399	438745	6518144	63.11MB	10.35MB	73.46MB	fineweb-2, mala
ilo_Latn	106182	7825	114007	28611987	2058380	30670367	137.03MB	35.87MB	172.90MB	fineweb-2, mala, New CC
und_Kali	105868	24327	130195	1388215	318992	1707207	100.34MB	87.21MB	187.55MB	fineweb-2
und_Plrd	104310	21074	125384	5466895	1104490	6571385	204.59MB	214.81MB	419.40MB	fineweb-2
und_Orya	104035	26521	130556	10140981	2585177	12726158	285.55MB	369.69MB	655.24MB	fineweb-2
und_Lisu	101469	20052	121521	24001781	4743160	28744941	194.77MB	502.77MB	697.54MB	fineweb-2
und_Hmng	101019	23337	124356	5369264	1240385	6609649	146.11MB	187.86MB	333.96MB	fineweb-2
acm_Arab	98441	9971	108412	15262277	1535376	16797653	208.21MB	60.02MB	268.23MB	fineweb-2, mala
und_Gran	97962	21568	119530	3569694	785929	4355623	129.01MB	232.60MB	361.61MB	fineweb-2
und_Nkoo	97035	25738	122773	3610543	957676	4568219	1.98GB	220.96MB	2.19GB	fineweb-2
und_Taml	96473	16678	113151	4896337	846465	5742802	119.23MB	148.10MB	267.34MB	fineweb-2
und_Gonm	94819	16284	111103	2831994	486360	3318354	101.94MB	135.58MB	237.51MB	fineweb-2
xmf_Geor	93457	6296	99753	9773908	648813	10422721	131.31MB	29.63MB	160.94MB	fineweb-2, mala, New CC
und_Cher	93115	25902	119017	8554417	2379600	10934017	231.06MB	655.93MB	886.99MB	fineweb-2
grc_Grek	91727	9095	100822	66089432	6641824	72731256	646.19MB	253.87MB	900.07MB	fineweb-2, mala
und_Tnsa	89545	17934	107479	3277073	656329	3933402	93.93MB	194.59MB	288.52MB	fineweb-2
vls_Latn	88705	8033	96738	10668200	1372722	12040922	61.92MB	18.12MB	80.03MB	fineweb-2, mala
und_Cprt	88193	14110	102303	7868259	1258842	9127101	135.76MB	81.93MB	217.69MB	fineweb-2
crh_Latn	87247	6536	93783	17309747	1544043	18853790	142.38MB	56.48MB	198.86MB	fineweb-2, mala
tso_Latn	87143	6899	94042	15833225	1217605	17050830	74.71MB	33.90MB	108.61MB	fineweb-2, mala
und_Gujr	86094	10274	96368	1361854	162516	1524370	138.38MB	46.91MB	185.29MB	fineweb-2
mwl_Latn	83967	6888	90855	14435707	1753945	16189652	75.27MB	26.54MB	101.81MB	fineweb-2, mala, New CC
nav_Latn	83465	10127	93592	4671502	568488	5239990	54.17MB	10.61MB	64.78MB	fineweb-2, mala
grn_Latn	81994	7378	89372	5116640	460393	5577033	48.16MB	24.51MB	72.67MB	mala, New CC
tat_Latn	81260	8881	90141	38663467	4101951	42765418	178.65MB	41.96MB	220.62MB	fineweb-2, mala
mui_Latn	79139	9696	88835	21543715	2639506	24183221	167.47MB	61.77MB	229.24MB	fineweb-2
und_Cari	77735	18089	95824	1726601	401781	2128382	85.23MB	72.49MB	157.72MB	fineweb-2
hin_Latn	77143	7901	85044	142599949	14552577	157152526	342.59MB	91.85MB	434.44MB	fineweb-2, mala
udm_Cyrl	75222	5119	80341	23245930	1576472	24822402	226.83MB	58.13MB	284.96MB	fineweb-2, mala
mzn_Arab	72131	7934	80065	3760565	449500	4210065	47.86MB	12.84MB	60.70MB	fineweb-2, mala, New CC
bcl_Latn	70457	5815	76272	10145284	851937	10997221	66.15MB	13.71MB	79.86MB	fineweb-2, mala
mvf_Mong	70427	10110	80537	41615108	5972648	47587756	636.66MB	224.38MB	861.04MB	mala
ban_Latn	70055	5234	75289	12540147	1293016	13833163	96.79MB	21.10MB	117.88MB	fineweb-2, mala
und_Diak	68418	22400	90818	2866590	938519	3805109	55.69MB	89.99MB	145.68MB	fineweb-2
und_Marc	67800	11889	79689	2340996	410503	2751499	63.43MB	90.93MB	154.35MB	fineweb-2
und_Mani	65940	9565	75505	6265468	908844	7174312	122.44MB	133.85MB	256.30MB	fineweb-2
und_Talu	65754	11936	77690	1271390	230789	1502179	74.85MB	59.32MB	134.17MB	fineweb-2
cnh_Latn	65675	5138	70813	30804735	2466938	33271673	180.66MB	25.01MB	205.68MB	fineweb-2, mala
und_Vith	65142	12134	77276	2493654	464493	2958147	118.64MB	90.85MB	209.49MB	fineweb-2
und_Nagm	63572	11939	75511	1030094	193454	1223548	55.51MB	70.45MB	125.96MB	fineweb-2
tyv_Cyrl	63103	4696	67799	21300318	1896312	23196630	192.23MB	47.84MB	240.07MB	fineweb-2, mala, New CC
afb_Arab	61990	10144	72134	1060404	173523	1233927	27.95MB	5.61MB	33.56MB	mala
kaa_Latn	61847	7173	69020	64456731	7475675	71932406	226.33MB	32.25MB	258.58MB	mala
zea_Latn	60735	5239	65974	19632548	1695080	21327628	113.86MB	46.10MB	159.96MB	fineweb-2, mala
und_Ahom	60214	9688	69902	2339091	376343	2715434	121.62MB	67.40MB	189.03MB	fineweb-2
cbk_Latn	60119	9090	69209	13271585	2065269	15336854	94.09MB	48.02MB	142.11MB	fineweb-2, mala
und_Mlym	58978	14241	73219	3230645	780080	4010725	57.12MB	544.80MB	601.92MB	fineweb-2
und_Java	58521	13319	71840	2180637	496299	2676936	63.47MB	110.75MB	174.22MB	fineweb-2
frp_Latn	57972	6385	64357	19131421	2130272	21261693	154.90MB	47.43MB	202.33MB	fineweb-2, mala
kur_Arab	56418	10636	67054	3504165	660610	4164775	38.61MB	31.84MB	70.45MB	mala
frr_Latn	55114	5153	60267	4698958	470344	5169302	36.52MB	7.03MB	43.55MB	fineweb-2, mala
und_Beng	54181	11961	66142	6484052	1431419	7915471	102.46MB	112.54MB	215.00MB	fineweb-2
ful_Latn	53792	12583	66375	2547970	596019	3143989	28.50MB	10.93MB	39.44MB	mala
lij_Latn	53397	3953	57350	27054603	2396525	29451128	218.66MB	22.80MB	241.46MB	fineweb-2, mala
mai_Deva	52579	3499	56078	19027256	1420056	20447312	182.32MB	50.34MB	232.66MB	fineweb-2, mala, New CC
pfl_Latn	51892	5496	57388	7996388	921961	8918349	74.44MB	22.93MB	97.38MB	fineweb-2, mala
apc_Arab	51274	5363	56637	2522964	290251	2813215	46.65MB	6.83MB	53.49MB	fineweb-2, mala
pon_Latn	49134	3576	52710	2071503	150765	2222268	21.82MB	7.31MB	29.13MB	mala
und_Palm	48994	5324	54318	424134	46089	470223	37.58MB	41.79MB	79.37MB	fineweb-2
und_Wara	46804	9119	55923	1471826	286761	1758587	55.77MB	50.31MB	106.09MB	fineweb-2
eml_Latn	46134	4841	50975	13764340	2077991	15842331	47.03MB	19.13MB	66.17MB	fineweb-2, mala, New CC
uzs_Arab	45920	4858	50778	8678772	935248	9614020	99.87MB	24.70MB	124.57MB	fineweb-2, mala
mam_Latn	45462	3688	49150	3549951	291438	3841389	28.26MB	9.65MB	37.91MB	fineweb-2, mala
gom_Latn	45419	3473	48892	10230352	1133421	11363773	36.47MB	21.09MB	57.56MB	fineweb-2, mala
hil_Latn	45213	4657	49870	23214327	2422795	25637122	161.25MB	27.80MB	189.05MB	fineweb-2, mala
nde_Latn	44632	6044	50676	4595249	506078	5101327	57.69MB	10.98MB	68.67MB	fineweb-2, mala
bak_Latn	42407	8888	51295	728901	152769	881670	18.37MB	4.56MB	22.93MB	mala
bjn_Latn	41994	3220	45214	3315251	266810	3582061	28.65MB	8.08MB	36.73MB	fineweb-2, mala
ven_Latn	41238	3067	44305	4648005	345686	4993691	21.43MB	10.73MB	32.16MB	mala
nap_Latn	41217	4097	45314	2442791	242815	2685606	20.59MB	4.60MB	25.19MB	mala
tet_Latn	40816	3935	44751	35942260	3464672	39406932	149.34MB	25.09MB	174.43MB	fineweb-2, mala
gaz_Latn	40698	2770	43468	14095644	959382	15055026	123.81MB	21.72MB	145.53MB	fineweb-2
tpi_Latn	40586	2795	43381	3966804	367319	4334123	30.83MB	4.45MB	35.29MB	fineweb-2, mala
dag_Latn	40538	5678	46216	29890540	4345826	34236366	190.15MB	75.77MB	265.92MB	fineweb-2, mala
inh_Cyrl	40204	4147	44351	15293973	1522399	16816372	157.75MB	57.47MB	215.22MB	fineweb-2, mala
und_Khoj	39854	5234	45088	892456	117205	1009661	41.07MB	38.34MB	79.41MB	fineweb-2
hbo_Hebr	39761	5197	44958	33872875	4427387	38300262	247.44MB	173.48MB	420.92MB	fineweb-2
sat_Olck	39623	2307	41930	6324645	386983	6711628	75.66MB	13.21MB	88.87MB	fineweb-2, mala
tzo_Latn	38607	2457	41064	3926713	300646	4227359	27.84MB	11.50MB	39.34MB	fineweb-2, mala
vep_Latn	38512	2923	41435	3838804	285848	4124652	34.41MB	6.61MB	41.02MB	fineweb-2, mala
glv_Latn	38448	2950	41398	3949611	319469	4269080	26.32MB	7.48MB	33.80MB	fineweb-2, mala
bam_Latn	38066	3385	41451	5501664	600716	6102380	37.48MB	17.94MB	55.42MB	fineweb-2, mala
twi_Latn	37805	2319	40124	7226865	482889	7709754	49.99MB	7.59MB	57.58MB	fineweb-2, mala
bik_Latn	37637	3390	41027	2910074	262113	3172187	20.78MB	11.72MB	32.50MB	mala
ajp_Arab	37428	4684	42112	701160	87748	788908	17.16MB	2.77MB	19.93MB	mala
ewe_Latn	37020	2815	39835	10693387	812000	11505387	37.90MB	20.88MB	58.78MB	fineweb-2, mala
myv_Cyrl	36593	3431	40024	9871024	1167799	11038823	79.17MB	26.33MB	105.50MB	fineweb-2, mala
wuu_Hani	36438	5032	41470	12210907	1692337	13903244	62.58MB	26.04MB	88.62MB	fineweb-2, mala, New CC
ace_Latn	36012	2390	38402	4727614	340911	5068525	29.79MB	12.53MB	42.32MB	fineweb-2, mala
acr_Latn	35673	4808	40481	1739875	207674	1947549	19.82MB	3.46MB	23.28MB	fineweb-2, mala
poh_Latn	35582	4130	39712	2238300	255200	2493500	22.74MB	3.77MB	26.51MB	fineweb-2, mala
ile_Latn	35514	3184	38698	1853629	166186	2019815	18.41MB	3.36MB	21.77MB	mala
und_Rohg	35210	5319	40529	534335	80719	615054	35.06MB	39.16MB	74.22MB	fineweb-2
rue_Cyrl	35072	3428	38500	2501407	244491	2745898	28.62MB	5.87MB	34.49MB	mala
und_Sidd	34753	8407	43160	3029254	732798	3762052	43.93MB	89.12MB	133.05MB	fineweb-2
kha_Latn	34496	3081	37577	14893054	1332421	16225475	87.53MB	13.65MB	101.18MB	fineweb-2, mala
und_Yezi	33923	3346	37269	96605	9528	106133	28.00MB	13.65MB	41.64MB	fineweb-2
krc_Cyrl	32684	2481	35165	10827923	987011	11814934	72.47MB	25.29MB	97.76MB	fineweb-2, mala, New CC
hui_Latn	32666	2734	35400	2819297	252731	3072028	19.53MB	11.00MB	30.53MB	fineweb-2, mala
und_Ougr	32342	6131	38473	442158	83819	525977	29.59MB	36.19MB	65.79MB	fineweb-2
ksh_Latn	32305	3424	35729	4186698	495252	4681950	32.11MB	9.67MB	41.78MB	fineweb-2, mala
und_Avst	32162	6620	38782	1749449	360094	2109543	49.25MB	51.32MB	100.57MB	fineweb-2
fur_Latn	32091	2675	34766	4066701	358103	4424804	31.76MB	7.46MB	39.22MB	fineweb-2, mala
und_Ital	32065	5059	37124	519268	81926	601194	32.71MB	27.88MB	60.59MB	fineweb-2
und_Wcho	31939	6507	38446	1477611	301036	1778647	55.55MB	71.09MB	126.64MB	fineweb-2
lfn_Latn	31799	3351	35150	4742630	458177	5200807	30.18MB	6.90MB	37.08MB	fineweb-2, mala
gor_Latn	31302	4030	35332	1760353	220314	1980667	16.33MB	5.28MB	21.61MB	fineweb-2, mala
gag_Latn	31258	3226	34484	33716296	3425581	37141877	108.32MB	23.54MB	131.86MB	fineweb-2, mala
und_Kthi	31065	5442	36507	763516	133753	897269	29.36MB	34.08MB	63.44MB	fineweb-2
und_Tavt	30945	3628	34573	670822	78647	749469	27.94MB	14.27MB	42.21MB	fineweb-2
chk_Latn	30741	2225	32966	2432676	225568	2658244	18.95MB	6.99MB	25.95MB	fineweb-2, mala
und_Takr	30701	5285	35986	1731217	298019	2029236	29.46MB	43.48MB	72.94MB	fineweb-2
kek_Latn	30411	1879	32290	2075679	134222	2209901	18.46MB	5.77MB	24.23MB	fineweb-2, mala
hmo_Latn	30343	2403	32746	2995843	220424	3216267	21.35MB	7.59MB	28.94MB	fineweb-2, mala
ssw_Latn	30012	1701	31713	3105097	181161	3286258	24.68MB	5.27MB	29.96MB	fineweb-2, mala
fon_Latn	29709	2048	31757	4767639	309917	5077556	18.15MB	10.77MB	28.92MB	fineweb-2, mala
tls_Latn	29613	5307	34920	508597	91146	599743	11.48MB	2.52MB	14.00MB	mala
csb_Latn	29196	2153	31349	4952890	317141	5270031	36.53MB	13.36MB	49.89MB	fineweb-2, mala
quh_Latn	28826	2700	31526	1032045	93702	1125747	14.62MB	5.52MB	20.14MB	fineweb-2, mala
meu_Latn	28594	2808	31402	2417774	226275	2644049	16.99MB	8.97MB	25.96MB	fineweb-2, mala
ksd_Latn	28462	2852	31314	3076511	315218	3391729	15.06MB	10.12MB	25.18MB	fineweb-2, mala
shn_Mymr	28217	2174	30391	11791880	906211	12698091	145.02MB	28.45MB	173.48MB	fineweb-2, mala
kab_Latn	28035	2369	30404	2792303	258299	3050602	24.93MB	4.17MB	29.10MB	fineweb-2, mala
tbz_Latn	27913	2460	30373	1909269	173142	2082411	17.06MB	7.32MB	24.38MB	fineweb-2, mala
ext_Latn	27668	2861	30529	3209746	436674	3646420	22.15MB	5.46MB	27.61MB	fineweb-2, mala
rop_Latn	27540	2923	30463	2688199	466485	3154684	21.71MB	7.01MB	28.71MB	fineweb-2, mala
bqc_Latn	27370	2265	29635	1041082	83956	1125038	14.16MB	2.11MB	16.27MB	fineweb-2, mala
und_Tfng	27355	3102	30457	666338	75561	741899	22.20MB	21.44MB	43.64MB	fineweb-2
knv_Latn	26665	3318	29983	1197974	144423	1342397	17.77MB	2.36MB	20.13MB	fineweb-2, mala
aln_Latn	26646	3766	30412	1801105	222804	2023909	17.42MB	4.66MB	22.08MB	fineweb-2, mala
npi_Deva	26320	2747	29067	645216	67340	712556	18.21MB	2.02MB	20.24MB	mala
und_Tale	26174	2802	28976	220838	23641	244479	22.70MB	16.06MB	38.75MB	fineweb-2
kbd_Cyrl	26073	2291	28364	19125862	1692000	20817862	103.60MB	22.88MB	126.49MB	fineweb-2, mala
rug_Latn	25866	3181	29047	1210636	126481	1337117	14.25MB	1.70MB	15.95MB	fineweb-2, mala
kom_Cyrl	25800	2957	28757	2002103	228055	2230158	24.22MB	14.25MB	38.48MB	mala, New CC
wal_Latn	25721	2348	28069	1378945	98215	1477160	17.60MB	3.46MB	21.06MB	fineweb-2, mala
kpg_Latn	25588	2837	28425	836396	92733	929129	11.91MB	1.36MB	13.28MB	mala
dzo_Tibt	24930	3261	28191	20714733	2721673	23436406	176.75MB	60.40MB	237.15MB	fineweb-2, mala
und_Elba	24862	4614	29476	394512	73215	467727	23.07MB	18.31MB	41.38MB	fineweb-2
und_Zanb	24457	4757	29214	327394	63679	391073	24.87MB	38.17MB	63.04MB	fineweb-2
fij_Latn	23954	1393	25347	14647245	847483	15494728	69.13MB	13.98MB	83.11MB	fineweb-2, mala
hac_Arab	23894	2545	26439	9787434	1042480	10829914	95.26MB	53.17MB	148.43MB	fineweb-2
tuc_Latn	23670	1457	25127	1667545	104822	1772367	13.39MB	4.72MB	18.11MB	fineweb-2, mala
mzh_Latn	23500	3422	26922	586804	87228	674032	10.84MB	1.72MB	12.56MB	fineweb-2, mala
pan_Latn	23499	3154	26653	19854767	2664876	22519643	54.19MB	16.32MB	70.51MB	fineweb-2
tum_Latn	23369	2361	25730	2515195	250726	2765921	13.87MB	8.07MB	21.94MB	fineweb-2, mala
sgs_Latn	23067	2441	25508	1004068	80951	1085019	14.22MB	3.07MB	17.28MB	fineweb-2, mala
iba_Latn	22540	1636	24176	10715907	774734	11490641	66.08MB	18.55MB	84.63MB	fineweb-2, mala
und_Sogo	22286	3876	26162	146127	25414	171541	16.99MB	19.14MB	36.13MB	fineweb-2
und_Soyo	22207	4912	27119	598892	132469	731361	23.88MB	35.07MB	58.95MB	fineweb-2
jbo_Latn	22173	2028	24201	3475872	319165	3795037	13.37MB	6.57MB	19.94MB	fineweb-2, mala, New CC
pcd_Latn	21506	1855	23361	1387314	119662	1506976	11.71MB	2.67MB	14.37MB	mala
kjh_Cyrl	21301	1847	23148	1031763	83608	1115371	16.03MB	6.33MB	22.36MB	fineweb-2, mala
und_Dogr	21286	3820	25106	1281296	229942	1511238	28.56MB	22.79MB	51.34MB	fineweb-2
zlm_Latn	20519	6988	27507	449459	153069	602528	9.13MB	3.43MB	12.55MB	mala
und_Kawi	20279	4104	24383	396571	80256	476827	19.93MB	23.17MB	43.10MB	fineweb-2
ixl_Latn	19330	3424	22754	838727	123333	962060	10.83MB	1.94MB	12.77MB	fineweb-2, mala
und_Phli	19162	2881	22043	41161	6188	47349	16.71MB	7.25MB	23.96MB	fineweb-2
ndo_Latn	18648	2347	20995	3209470	232454	3441924	29.62MB	3.61MB	33.23MB	fineweb-2, mala
suz_Deva	18616	2234	20850	601340	74865	676205	13.35MB	3.12MB	16.48MB	fineweb-2, mala
bxr_Cyrl	18206	1647	19853	9061656	786878	9848534	71.37MB	18.02MB	89.39MB	fineweb-2, mala, New CC
ava_Cyrl	18167	1554	19721	14550948	1228411	15779359	107.88MB	14.21MB	122.09MB	fineweb-2, mala, New CC
und_Guru	18064	3007	21071	535109	89076	624185	21.62MB	55.12MB	76.74MB	fineweb-2
und_Cham	17917	3604	21521	762241	153324	915565	20.14MB	38.06MB	58.20MB	fineweb-2
und_Nbat	17611	3191	20802	280135	50758	330893	18.03MB	15.23MB	33.26MB	fineweb-2
brh_Arab	17461	1987	19448	6795547	773309	7568856	55.61MB	15.52MB	71.13MB	fineweb-2
und_Nand	17389	3359	20748	307115	59324	366439	16.94MB	18.31MB	35.25MB	fineweb-2
bho_Deva	17315	2180	19495	6646075	830554	7476629	91.40MB	15.63MB	107.03MB	fineweb-2, mala
ctd_Latn	17146	1544	18690	14833923	1341649	16175572	81.02MB	15.27MB	96.30MB	fineweb-2, mala
und_Osma	16975	2590	19565	495544	75608	571152	18.27MB	14.41MB	32.69MB	fineweb-2
aym_Latn	16583	1862	18445	1794497	201492	1995989	16.32MB	7.86MB	24.18MB	mala
hus_Latn	16480	1289	17769	1550072	155334	1705406	11.68MB	4.61MB	16.29MB	fineweb-2, mala
mfe_Latn	16480	4048	20528	12277328	2993026	15270354	52.61MB	59.40MB	112.01MB	fineweb-2, mala
gom_Deva	16447	1403	17850	6198275	488821	6687096	70.16MB	13.43MB	83.59MB	fineweb-2, mala, New CC
ady_Cyrl	16359	1820	18179	10748863	1168169	11917032	68.27MB	11.84MB	80.11MB	fineweb-2, mala
nbl_Latn	16112	1903	18015	838256	96449	934705	11.61MB	2.86MB	14.47MB	fineweb-2, mala
bat_Latn	15407	1524	16931	520105	51446	571551	9.15MB	2.32MB	11.47MB	mala
gug_Latn	15148	2562	17710	4578245	605145	5183390	38.46MB	10.19MB	48.65MB	fineweb-2, mala
ctu_Latn	14907	1075	15982	869126	61725	930851	7.86MB	2.49MB	10.35MB	fineweb-2, mala
und_Sind	14810	4238	19048	315612	90315	405927	20.18MB	17.84MB	38.02MB	fineweb-2
hne_Deva	14754	1465	16219	5962132	551615	6513747	71.79MB	13.03MB	84.82MB	fineweb-2, mala
guj_Deva	14742	2102	16844	1223055	174390	1397445	8.71MB	15.10MB	23.81MB	mala
srn_Latn	14688	1513	16201	9591478	933951	10525429	30.79MB	10.57MB	41.36MB	fineweb-2, mala
und_Sogd	14515	2728	17243	307504	57793	365297	13.99MB	9.28MB	23.27MB	fineweb-2
ton_Latn	14442	1278	15720	15103774	1335385	16439159	52.84MB	10.09MB	62.93MB	fineweb-2, mala
san_Latn	14408	1363	15771	19910532	2248454	22158986	35.94MB	30.47MB	66.40MB	fineweb-2, mala
aoj_Latn	13950	1700	15650	978003	121818	1099821	11.75MB	1.24MB	12.99MB	fineweb-2, mala
ltg_Latn	13945	831	14776	6977241	399268	7376509	48.38MB	5.17MB	53.55MB	fineweb-2, mala
lez_Cyrl	13694	1523	15217	6271912	679498	6951410	50.57MB	13.03MB	63.60MB	fineweb-2, mala, New CC
xav_Latn	13685	1355	15040	1182331	132463	1314794	10.89MB	1.91MB	12.80MB	fineweb-2, mala
luo_Latn	13435	1389	14824	1167915	129822	1297737	10.91MB	1.78MB	12.69MB	fineweb-2, mala
jam_Latn	13418	2184	15602	4585080	786365	5371445	26.25MB	15.70MB	41.95MB	fineweb-2, mala
bug_Latn	13294	2364	15658	457683	78412	536095	6.82MB	1.49MB	8.30MB	fineweb-2, mala
mrj_Cyrl	13282	1076	14358	6260548	530718	6791266	43.19MB	11.77MB	54.97MB	fineweb-2, mala, New CC
kos_Latn	13231	1029	14260	2125205	249061	2374266	12.98MB	5.13MB	18.11MB	fineweb-2, mala
und_Pauc	13226	4277	17503	1884578	609431	2494009	13.02MB	31.50MB	44.51MB	fineweb-2
uig_Cyrl	13089	1314	14403	17895565	1796529	19692094	78.38MB	15.30MB	93.68MB	fineweb-2
bih_Deva	12955	1570	14525	610769	74018	684787	10.24MB	3.37MB	13.60MB	mala
kbp_Latn	12917	1030	13947	5130789	412563	5543352	23.48MB	7.10MB	30.58MB	fineweb-2, mala
alt_Cyrl	12874	947	13821	7266148	515897	7782045	46.66MB	13.43MB	60.09MB	fineweb-2, mala
cfm_Latn	12560	963	13523	6839916	537373	7377289	35.12MB	10.53MB	45.65MB	fineweb-2, mala
und_Sylo	12417	2878	15295	922705	213863	1136568	21.71MB	21.20MB	42.91MB	fineweb-2
avk_Latn	12268	2019	14287	2064997	339846	2404843	11.57MB	4.10MB	15.67MB	fineweb-2, mala
lad_Latn	11615	1357	12972	4089396	495441	4584837	27.23MB	6.02MB	33.25MB	fineweb-2, mala
goh_Latn	11236	1748	12984	12674662	1971814	14646476	73.62MB	40.63MB	114.25MB	fineweb-2
cor_Latn	11233	1100	12333	4362762	407095	4769857	20.14MB	4.66MB	24.80MB	fineweb-2, mala, New CC
quc_Latn	11178	991	12169	2123624	191742	2315366	12.23MB	5.98MB	18.20MB	fineweb-2, mala
und_Goth	10949	1171	12120	108649	11620	120269	9.68MB	3.09MB	12.77MB	fineweb-2
kpv_Cyrl	10379	1181	11560	5296164	518302	5814466	52.44MB	24.08MB	76.52MB	fineweb-2, mala
aka_Latn	10332	951	11283	4341922	399648	4741570	22.37MB	7.51MB	29.87MB	mala
und_Rjng	10296	2356	12652	595509	136268	731777	9.00MB	14.33MB	23.32MB	fineweb-2
und_Chrs	10239	1260	11499	45985	5658	51643	7.84MB	5.20MB	13.04MB	fineweb-2
rmn_Latn	10203	710	10913	5087312	354012	5441324	31.16MB	11.69MB	42.85MB	fineweb-2
hak_Latn	10124	821	10945	1232294	101429	1333723	8.54MB	2.03MB	10.57MB	fineweb-2, mala
rcf_Latn	9696	762	10458	2900501	223229	3123730	20.20MB	2.71MB	22.91MB	fineweb-2, mala
gym_Latn	9590	752	10342	1867978	149232	2017210	10.17MB	6.22MB	16.38MB	fineweb-2, mala
sag_Latn	9583	929	10512	4933566	478272	5411838	15.58MB	4.12MB	19.70MB	mala
pls_Latn	9497	1097	10594	366580	45809	412389	5.04MB	658.19KB	5.68MB	fineweb-2, mala
som_Arab	9235	1126	10361	1404768	171279	1576047	6.83MB	11.35MB	18.18MB	mala
kik_Latn	9164	1032	10196	1263733	141916	1405649	10.17MB	1.79MB	11.96MB	fineweb-2, mala
arn_Latn	9145	640	9785	1390392	113503	1503895	9.10MB	4.61MB	13.70MB	fineweb-2, mala
pag_Latn	9121	794	9915	3469224	300380	3769604	13.11MB	4.66MB	17.77MB	fineweb-2, mala
und_Phlp	9083	2029	11112	31623	7064	38687	7.96MB	5.35MB	13.31MB	fineweb-2
tca_Latn	9010	568	9578	615647	53510	669157	6.83MB	1.42MB	8.25MB	fineweb-2, mala
ngu_Latn	8913	760	9673	1747904	155583	1903487	12.16MB	5.99MB	18.15MB	fineweb-2, mala
dsb_Latn	8851	797	9648	3236141	299732	3535873	17.57MB	3.82MB	21.39MB	fineweb-2, mala, New CC, New CC
mni_Mtei	8768	695	9463	624306	49755	674061	9.07MB	2.98MB	12.05MB	fineweb-2, mala
und_Mand	8727	1486	10213	82871	14111	96982	8.65MB	5.00MB	13.64MB	fineweb-2
min_Arab	8713	1780	10493	1772827	365826	2138653	22.03MB	10.09MB	32.12MB	fineweb-2, New CC
und_Tglg	8582	1883	10465	638746	140149	778895	10.70MB	10.39MB	21.09MB	fineweb-2
und_Shaw	8406	1283	9689	915432	139721	1055153	13.02MB	12.04MB	25.06MB	fineweb-2
olo_Latn	8359	687	9046	3212952	237567	3450519	16.16MB	4.61MB	20.78MB	fineweb-2, mala
fro_Latn	8283	757	9040	18394896	1681146	20076042	104.91MB	25.25MB	130.16MB	fineweb-2
kac_Latn	8238	634	8872	6593345	522580	7115925	33.64MB	5.62MB	39.27MB	fineweb-2, mala
pdc_Latn	8230	1130	9360	8095985	1188953	9284938	17.12MB	16.66MB	33.78MB	fineweb-2, mala
fit_Latn	8205	824	9029	2628515	263972	2892487	20.93MB	7.83MB	28.76MB	fineweb-2
crh_Cyrl	8166	770	8936	8087653	821898	8909551	26.20MB	11.85MB	38.05MB	fineweb-2, mala
mps_Latn	8131	1026	9157	2455357	280254	2735611	11.44MB	6.34MB	17.78MB	fineweb-2, mala
toj_Latn	8037	579	8616	948774	84375	1033149	6.00MB	2.81MB	8.81MB	fineweb-2, mala
guc_Latn	8021	520	8541	1313583	82949	1396532	6.16MB	2.68MB	8.84MB	fineweb-2, mala
srm_Latn	8017	612	8629	793878	60602	854480	4.24MB	2.28MB	6.52MB	mala
cab_Latn	7983	712	8695	1051226	101324	1152550	7.15MB	4.73MB	11.88MB	fineweb-2, mala
mdf_Cyrl	7968	762	8730	4632836	431482	5064318	35.73MB	5.39MB	41.12MB	fineweb-2, mala
naq_Latn	7914	1465	9379	821139	170280	991419	9.06MB	1.69MB	10.75MB	fineweb-2, mala
mad_Latn	7778	925	8703	980475	116603	1097078	4.83MB	2.81MB	7.64MB	mala
smn_Latn	7747	749	8496	2370025	225100	2595125	14.34MB	3.23MB	17.57MB	fineweb-2, mala
bis_Latn	7625	567	8192	3981683	279052	4260735	18.01MB	3.53MB	21.55MB	fineweb-2, mala
bzj_Latn	7621	557	8178	798646	60223	858869	4.43MB	2.54MB	6.97MB	fineweb-2, mala
cuk_Latn	7619	572	8191	851012	95848	946860	8.81MB	2.55MB	11.36MB	fineweb-2, mala
zai_Latn	7610	992	8602	548829	66545	615374	5.76MB	900.59KB	6.64MB	fineweb-2, mala
djk_Latn	7551	503	8054	713143	48228	761371	3.74MB	2.22MB	5.97MB	fineweb-2, mala
ote_Latn	7542	939	8481	527319	61940	589259	5.28MB	829.96KB	6.09MB	fineweb-2, mala
und_Hatr	7438	1634	9072	371481	81607	453088	9.68MB	12.90MB	22.58MB	fineweb-2
cdo_Latn	7366	1527	8893	238395	49420	287815	4.16MB	1.61MB	5.77MB	mala
tuk_Arab	7353	940	8293	2019038	235097	2254135	10.38MB	4.76MB	15.14MB	fineweb-2, New CC
nch_Latn	7265	1201	8466	753073	107083	860156	7.71MB	1.71MB	9.42MB	fineweb-2, mala
prs_Arab	7203	1125	8328	188232	29399	217631	3.73MB	734.77KB	4.45MB	mala
shp_Latn	7193	727	7920	221126	24662	245788	3.17MB	1.03MB	4.20MB	fineweb-2, mala
ncj_Latn	7093	942	8035	341341	51207	392548	4.72MB	1.01MB	5.73MB	fineweb-2, mala
mco_Latn	7064	858	7922	348796	40298	389094	4.83MB	654.04KB	5.47MB	fineweb-2, mala
und_Bugi	7013	1318	8331	94099	17684	111783	6.55MB	5.89MB	12.44MB	fineweb-2
seh_Latn	6989	598	7587	1001137	91194	1092331	7.18MB	3.49MB	10.67MB	fineweb-2, mala
qvi_Latn	6969	519	7488	414611	30681	445292	4.40MB	2.12MB	6.51MB	fineweb-2, mala
ike_Cans	6917	438	7355	1805712	114341	1920053	43.23MB	7.66MB	50.89MB	fineweb-2
map_Latn	6873	1290	8163	389852	73171	463023	3.44MB	3.01MB	6.45MB	mala
kum_Cyrl	6786	436	7222	3516299	226915	3743214	51.27MB	5.13MB	56.40MB	fineweb-2, mala
ang_Latn	6755	857	7612	18583013	2895373	21478386	12.75MB	45.59MB	58.33MB	fineweb-2, mala
awa_Deva	6720	652	7372	1833271	186769	2020040	21.99MB	5.59MB	27.58MB	fineweb-2, mala
ikk_Latn	6619	940	7559	208890	30638	239528	3.35MB	540.94KB	3.88MB	fineweb-2, mala
und_Tagb	6579	1143	7722	30924	5372	36296	5.57MB	2.22MB	7.79MB	fineweb-2
top_Latn	6569	841	7410	145343	18607	163950	3.34MB	463.04KB	3.79MB	mala
yom_Latn	6507	936	7443	261721	33437	295158	3.75MB	580.62KB	4.32MB	fineweb-2, mala
tly_Latn	6482	694	7176	323019	34584	357603	3.37MB	1.00MB	4.37MB	mala
mos_Latn	6368	808	7176	1375570	163352	1538922	10.36MB	882.80KB	11.22MB	fineweb-2, mala
ayr_Latn	6289	747	7036	1926378	228813	2155191	18.33MB	5.48MB	23.81MB	fineweb-2
mau_Latn	6237	1107	7344	235248	28496	263744	4.30MB	731.53KB	5.02MB	fineweb-2, mala
und_Olck	6177	1754	7931	371764	105564	477328	6.50MB	23.56MB	30.06MB	fineweb-2
yap_Latn	6108	689	6797	1371262	231832	1603094	7.88MB	2.63MB	10.51MB	fineweb-2, mala
und_Prti	6054	1093	7147	225933	40790	266723	6.97MB	4.36MB	11.33MB	fineweb-2
tdt_Latn	6025	863	6888	182143	26089	208232	2.75MB	440.78KB	3.18MB	mala
tah_Latn	5808	600	6408	6515410	711005	7226415	23.34MB	3.74MB	27.08MB	fineweb-2, mala
szy_Latn	5806	478	6284	3810359	317248	4127607	19.91MB	2.37MB	22.28MB	fineweb-2, mala
xal_Cyrl	5763	629	6392	2670010	273003	2943013	13.02MB	4.40MB	17.43MB	fineweb-2, mala, New CC
stq_Latn	5577	630	6207	2286526	258139	2544665	10.40MB	3.01MB	13.41MB	fineweb-2, mala
tlh_Latn	5528	671	6199	1755152	217384	1972536	14.09MB	2.87MB	16.96MB	fineweb-2, mala
evn_Cyrl	5486	1173	6659	220824	47215	268039	4.69MB	1.42MB	6.10MB	mala
skr_Arab	5396	561	5957	2872404	297287	3169691	18.47MB	7.85MB	26.32MB	fineweb-2, mala
nov_Latn	5378	617	5995	2147635	259618	2407253	9.54MB	12.01MB	21.55MB	fineweb-2, mala
quw_Latn	5343	930	6273	86757	15101	101858	2.34MB	434.49KB	2.77MB	mala
yua_Latn	5313	608	5921	3110895	357801	3468696	22.59MB	4.31MB	26.90MB	fineweb-2, mala
fiu_Latn	5276	400	5676	423848	32134	455982	4.48MB	1.32MB	5.81MB	mala
otq_Latn	5272	876	6148	3409366	579455	3988821	22.16MB	5.27MB	27.43MB	fineweb-2, mala
und_Narb	5224	835	6059	56094	8966	65060	5.73MB	6.79MB	12.53MB	fineweb-2
non_Latn	5208	388	5596	8552232	637147	9189379	37.62MB	20.99MB	58.61MB	fineweb-2
raw_Latn	5184	689	5873	5264857	699746	5964603	46.45MB	11.15MB	57.61MB	fineweb-2
bbc_Latn	5114	505	5619	5276633	516461	5793094	29.69MB	8.88MB	38.57MB	fineweb-2, mala
ber_Latn	5014	439	5453	3770704	330143	4100847	21.95MB	7.03MB	28.97MB	mala
und_Sarb	4990	874	5864	170459	29855	200314	6.61MB	15.21MB	21.81MB	fineweb-2
kua_Latn	4850	379	5229	2020563	158381	2178944	14.98MB	2.40MB	17.38MB	fineweb-2, mala
und_Ugar	4848	653	5501	133049	17921	150970	3.84MB	2.36MB	6.20MB	fineweb-2
enm_Latn	4840	800	5640	5063220	836895	5900115	35.12MB	6.72MB	41.84MB	fineweb-2
bua_Cyrl	4821	433	5254	2389868	214646	2604514	34.42MB	4.82MB	39.24MB	mala
kea_Latn	4788	645	5433	1991544	264653	2256197	12.53MB	4.14MB	16.67MB	fineweb-2, mala
uig_Latn	4713	615	5328	389468	50821	440289	2.33MB	1.13MB	3.47MB	mala
zza_Latn	4632	509	5141	3034848	333492	3368340	19.74MB	7.38MB	27.12MB	mala
mnw_Mymr	4625	400	5025	4338087	376128	4714215	38.69MB	19.62MB	58.31MB	fineweb-2, mala
und_Lydi	4591	1028	5619	28081628	6287936	34369564	73.64MB	67.70MB	141.34MB	fineweb-2
csy_Latn	4554	583	5137	316700	31989	348689	3.05MB	333.92KB	3.38MB	fineweb-2, mala
dtp_Latn	4454	563	5017	867974	103741	971715	8.25MB	1.58MB	9.83MB	fineweb-2, mala
azj_Cyrl	4407	392	4799	2270043	201918	2471961	31.26MB	5.12MB	36.38MB	fineweb-2
nog_Cyrl	4319	408	4727	824511	83300	907811	11.41MB	3.42MB	14.83MB	fineweb-2, mala
nah_Latn	4317	632	4949	278869	40741	319610	2.44MB	1.12MB	3.56MB	mala, New CC
meo_Latn	4209	371	4580	1068811	94209	1163020	7.51MB	1.02MB	8.53MB	mala
yao_Latn	4181	636	4817	196376	24488	220864	2.84MB	387.73KB	3.22MB	fineweb-2, mala
chm_Cyrl	4167	337	4504	2537674	205230	2742904	30.63MB	6.84MB	37.47MB	mala
kas_Arab	4135	316	4451	636364	40384	676748	5.42MB	1.19MB	6.61MB	fineweb-2, mala
mbf_Latn	4128	381	4509	2485436	229397	2714833	26.41MB	1.85MB	28.26MB	fineweb-2
abt_Latn	4090	492	4582	1834060	311432	2145492	13.49MB	3.41MB	16.89MB	fineweb-2, mala
tcy_Knda	4031	325	4356	2991685	241217	3232902	27.24MB	7.86MB	35.10MB	fineweb-2, mala
arb_Latn	3940	545	4485	1613407	223174	1836581	11.85MB	4.67MB	16.52MB	fineweb-2
tam_Latn	3925	455	4380	9347463	1083591	10431054	19.21MB	9.57MB	28.78MB	fineweb-2
ame_Latn	3914	652	4566	106152	18461	124613	2.16MB	440.61KB	2.59MB	fineweb-2, mala
crs_Latn	3842	515	4357	15216161	2224691	17440852	87.61MB	4.84MB	92.45MB	fineweb-2, mala
lbe_Cyrl	3823	397	4220	3368768	351114	3719882	21.42MB	3.39MB	24.80MB	fineweb-2, mala
zom_Latn	3735	233	3968	1737655	108399	1846054	11.27MB	1.86MB	13.13MB	fineweb-2
hau_Arab	3729	1041	4770	103254	28824	132078	1.99MB	918.26KB	2.89MB	mala
rom_Latn	3686	371	4057	2205605	221996	2427601	12.24MB	3.75MB	15.99MB	mala
cni_Latn	3574	586	4160	145656	22613	168269	2.79MB	420.00KB	3.20MB	fineweb-2, mala
efi_Latn	3484	194	3678	1857948	103456	1961404	14.54MB	1.01MB	15.56MB	fineweb-2
ben_Latn	3418	645	4063	3780581	728682	4509263	8.82MB	5.34MB	14.17MB	fineweb-2, mala
blk_Mymr	3405	296	3701	2459803	223904	2683707	26.96MB	5.28MB	32.24MB	fineweb-2, mala
koi_Cyrl	3276	275	3551	653112	54824	707936	4.42MB	1.66MB	6.07MB	mala
ada_Latn	3254	413	3667	4664956	602894	5267850	25.36MB	4.40MB	29.76MB	fineweb-2, mala
und_Buhd	3158	448	3606	7767	1101	8868	2.60MB	609.26KB	3.20MB	fineweb-2
ach_Latn	3157	239	3396	1691268	146225	1837493	10.32MB	905.77KB	11.21MB	fineweb-2, mala
trv_Latn	3079	319	3398	1561399	161086	1722485	8.67MB	2.65MB	11.32MB	fineweb-2, mala
mgh_Latn	3062	267	3329	1014664	88302	1102966	7.90MB	1.68MB	9.58MB	fineweb-2, mala
bbj_Latn	3039	383	3422	102944	19687	122631	1.27MB	444.28KB	1.70MB	fineweb-2, mala
krl_Latn	3008	239	3247	1378109	109497	1487606	12.20MB	2.46MB	14.66MB	fineweb-2
iso_Latn	2945	301	3246	3370964	345115	3716079	20.30MB	2.51MB	22.81MB	fineweb-2, mala
gcr_Latn	2904	356	3260	537566	65667	603233	3.63MB	729.90KB	4.34MB	fineweb-2, mala
kas_Latn	2887	426	3313	3665093	540813	4205906	7.55MB	5.65MB	13.20MB	fineweb-2
und_Perm	2874	630	3504	19169	4202	23371	2.46MB	1.30MB	3.76MB	fineweb-2
atj_Latn	2822	244	3066	462035	56572	518607	3.51MB	1.52MB	5.03MB	fineweb-2, mala
mni_Beng	2784	227	3011	10018767	825720	10844487	35.48MB	5.70MB	41.18MB	fineweb-2, mala
gur_Latn	2767	248	3015	1397879	119388	1517267	5.94MB	807.13KB	6.73MB	fineweb-2, mala
sma_Latn	2764	227	2991	831229	68266	899495	7.82MB	2.26MB	10.08MB	fineweb-2
chu_Cyrl	2738	300	3038	1405284	153290	1558574	6.63MB	1.42MB	8.05MB	fineweb-2, mala
brx_Deva	2720	157	2877	567743	34520	602263	12.19MB	1.43MB	13.62MB	fineweb-2, mala
iku_Cans	2718	286	3004	811697	85410	897107	17.99MB	3.90MB	21.89MB	mala
cmo_Latn	2711	335	3046	1033194	127672	1160866	6.69MB	1.21MB	7.89MB	fineweb-2
btx_Latn	2705	195	2900	1720528	124253	1844781	11.85MB	1.48MB	13.33MB	fineweb-2, mala
dty_Deva	2689	247	2936	405623	37431	443054	4.96MB	1.73MB	6.69MB	fineweb-2, mala
gos_Latn	2687	287	2974	1051585	112320	1163905	7.65MB	946.10KB	8.57MB	fineweb-2
nzi_Latn	2681	244	2925	2242447	207818	2450265	14.86MB	1.60MB	16.46MB	fineweb-2, mala
bts_Latn	2660	211	2871	1311786	102596	1414382	9.21MB	1.35MB	10.56MB	fineweb-2, mala
quy_Latn	2655	162	2817	660458	41768	702226	7.38MB	1.09MB	8.46MB	fineweb-2, mala
lki_Arab	2646	451	3097	501033	85399	586432	5.73MB	1.66MB	7.39MB	fineweb-2
guw_Latn	2644	201	2845	1170447	87242	1257689	7.42MB	754.77KB	8.15MB	fineweb-2, mala
pck_Latn	2636	191	2827	3951727	284081	4235808	21.62MB	2.53MB	24.15MB	fineweb-2, mala
ckt_Latn	2632	224	2856	111880	9521	121401	7.67MB	1003.49KB	8.65MB	mala
lrc_Arab	2599	389	2988	577704	82561	660265	6.01MB	1.86MB	7.87MB	fineweb-2, mala
nyu_Latn	2592	275	2867	537673	62860	600533	4.43MB	1.45MB	5.89MB	fineweb-2, mala
ksw_Mymr	2571	224	2795	504327	39895	544222	21.89MB	3.79MB	25.68MB	fineweb-2, mala
bal_Arab	2562	188	2750	1509589	110773	1620362	11.86MB	2.50MB	14.36MB	mala
gcf_Latn	2557	254	2811	976651	97015	1073666	6.48MB	1.42MB	7.90MB	fineweb-2
nia_Latn	2543	238	2781	1647545	147561	1795106	7.18MB	1.55MB	8.73MB	fineweb-2, mala
dyu_Latn	2494	195	2689	1214350	97509	1311859	7.22MB	928.15KB	8.13MB	fineweb-2, mala
nhe_Latn	2489	258	2747	1487476	146415	1633891	12.48MB	1.50MB	13.98MB	fineweb-2, mala
loz_Latn	2463	136	2599	1719591	94951	1814542	10.81MB	1.14MB	11.94MB	fineweb-2
tay_Latn	2369	246	2615	549829	57094	606923	2.55MB	1.00MB	3.55MB	mala
hwc_Latn	2352	363	2715	1586723	244889	1831612	7.11MB	4.80MB	11.90MB	fineweb-2
gaa_Latn	2346	264	2610	1341955	151012	1492967	9.28MB	1.13MB	10.41MB	fineweb-2
ami_Latn	2336	308	2644	1422955	189398	1612353	5.95MB	2.66MB	8.61MB	fineweb-2, mala
mup_Deva	2324	371	2695	655944	104714	760658	6.60MB	2.83MB	9.43MB	fineweb-2
snd_Latn	2242	257	2499	1195739	137067	1332806	4.40MB	1.08MB	5.48MB	fineweb-2
pnt_Grek	2237	302	2539	2064646	289840	2354486	7.34MB	2.58MB	9.93MB	fineweb-2, mala
abq_Cyrl	2227	351	2578	311701	29945	341646	5.22MB	979.30KB	6.18MB	fineweb-2, mala
ium_Latn	2227	253	2480	2178910	251279	2430189	10.51MB	2.56MB	13.08MB	fineweb-2, mala
gil_Latn	2224	146	2370	1343484	88196	1431680	7.58MB	1.23MB	8.82MB	fineweb-2
shi_Latn	2183	255	2438	5868473	773346	6641819	3.52MB	13.46MB	16.98MB	fineweb-2, mala
cmr_Latn	2181	257	2438	2812904	331460	3144364	22.05MB	2.44MB	24.49MB	fineweb-2
tzm_Tfng	2160	216	2376	643153	64315	707468	9.74MB	2.00MB	11.73MB	fineweb-2
sms_Latn	2118	150	2268	591273	41874	633147	7.30MB	1.23MB	8.53MB	fineweb-2
kon_Latn	2055	229	2284	411866	45896	457762	2.11MB	1.56MB	3.66MB	mala
tcz_Latn	2051	109	2160	2790663	148309	2938972	16.55MB	2.00MB	18.54MB	fineweb-2
tab_Cyrl	2027	149	2176	973469	72381	1045850	14.58MB	1.74MB	16.32MB	fineweb-2, mala
tzh_Latn	2013	220	2233	1974530	221409	2195939	11.41MB	2.94MB	14.36MB	fineweb-2, mala
ape_Latn	2012	235	2247	1161350	172387	1333737	8.24MB	1.51MB	9.75MB	fineweb-2, mala
abs_Latn	2008	441	2449	1125701	247228	1372929	5.62MB	4.84MB	10.46MB	fineweb-2
acd_Latn	1987	195	2182	444375	43610	487985	2.92MB	1.17MB	4.09MB	fineweb-2
aaz_Latn	1982	115	2097	460750	26733	487483	3.32MB	818.05KB	4.12MB	fineweb-2
bci_Latn	1980	211	2191	2126232	229832	2356064	11.78MB	1.55MB	13.33MB	fineweb-2, mala
fuv_Latn	1979	190	2169	660823	63444	724267	5.28MB	1.04MB	6.32MB	fineweb-2
tvl_Latn	1910	152	2062	2562279	214970	2777249	12.56MB	1.68MB	14.24MB	fineweb-2, mala
pli_Deva	1893	113	2006	26175	1562	27737	1.13MB	106.58KB	1.24MB	mala
ewo_Latn	1876	465	2341	530925	131567	662492	3.20MB	2.37MB	5.57MB	fineweb-2, mala
swc_Latn	1863	298	2161	309030	49431	358461	3.31MB	754.52KB	4.05MB	fineweb-2
tly_Arab	1858	238	2096	260470	33364	293834	2.64MB	528.29KB	3.16MB	mala
sgc_Latn	1812	328	2140	442321	80066	522387	2.97MB	2.20MB	5.17MB	fineweb-2
zha_Latn	1795	238	2033	102886	13641	116527	1.04MB	579.94KB	1.60MB	mala
quz_Latn	1790	194	1984	722225	77743	799968	6.79MB	1.67MB	8.46MB	fineweb-2, mala
cha_Latn	1774	177	1951	1782095	175583	1957678	5.13MB	2.03MB	7.16MB	fineweb-2, mala
bru_Latn	1768	221	1989	1867320	237022	2104342	9.08MB	4.43MB	13.51MB	fineweb-2, mala
aeu_Latn	1761	166	1927	368223	36600	404823	2.58MB	987.33KB	3.55MB	fineweb-2, mala
pib_Latn	1740	273	2013	45606	9395	55001	956.10KB	189.03KB	1.12MB	fineweb-2, mala
nqo_Nkoo	1736	147	1883	1297276	109299	1406575	9.62MB	2.08MB	11.69MB	fineweb-2, mala
kng_Latn	1726	104	1830	1747424	105290	1852714	9.68MB	1.94MB	11.62MB	fineweb-2
rmc_Latn	1723	141	1864	1235721	101134	1336855	7.74MB	1.26MB	9.00MB	fineweb-2, mala
smj_Latn	1701	142	1843	425184	35494	460678	4.11MB	1019.96KB	5.11MB	fineweb-2
bjn_Arab	1688	222	1910	3364741	442519	3807260	7.40MB	6.13MB	13.54MB	fineweb-2
mer_Latn	1684	195	1879	164322	19027	183349	2.41MB	352.69KB	2.75MB	fineweb-2
hla_Latn	1674	123	1797	353902	25989	379891	2.41MB	582.62KB	2.98MB	fineweb-2, mala
und_Elym	1662	496	2158	61253	18280	79533	1.79MB	7.17MB	8.96MB	fineweb-2
msi_Latn	1657	188	1845	382369	43382	425751	2.71MB	473.48KB	3.17MB	mala
zap_Latn	1634	169	1803	1602098	165700	1767798	10.66MB	1.62MB	12.28MB	mala
mbt_Latn	1604	149	1753	1635457	157142	1792599	7.47MB	3.58MB	11.05MB	fineweb-2, mala
doi_Deva	1603	206	1809	513122	66114	579236	6.40MB	2.18MB	8.58MB	fineweb-2, mala
aau_Latn	1593	96	1689	435186	26225	461411	3.11MB	598.05KB	3.70MB	fineweb-2
dhv_Latn	1589	232	1821	669530	97754	767284	3.67MB	2.21MB	5.88MB	fineweb-2
cac_Latn	1584	194	1778	854611	106419	961030	5.70MB	914.70KB	6.59MB	fineweb-2, mala
acf_Latn	1576	289	1865	893132	142495	1035627	5.14MB	1.50MB	6.64MB	fineweb-2, mala
nak_Latn	1558	114	1672	376881	27576	404457	2.55MB	684.81KB	3.22MB	fineweb-2
taq_Latn	1557	172	1729	760343	83994	844337	4.98MB	2.70MB	7.67MB	fineweb-2
fuf_Latn	1553	92	1645	721390	42735	764125	5.06MB	837.21KB	5.88MB	fineweb-2
mjw_Latn	1544	104	1648	359172	24192	383364	3.34MB	334.22KB	3.67MB	fineweb-2
pdt_Latn	1542	230	1772	673450	100449	773899	4.04MB	2.37MB	6.41MB	fineweb-2
bgp_Latn	1516	184	1700	1238088	150269	1388357	6.14MB	1.79MB	7.93MB	mala
bba_Latn	1509	123	1632	584646	47655	632301	4.03MB	298.12KB	4.33MB	fineweb-2
tyz_Latn	1509	185	1694	1690506	207252	1897758	11.45MB	1.64MB	13.09MB	mala
kmb_Latn	1495	175	1670	1343747	155030	1498777	8.23MB	1.10MB	9.32MB	fineweb-2, mala
ron_Cyrl	1447	157	1604	5122566	555800	5678366	9.63MB	7.14MB	16.77MB	fineweb-2
amu_Latn	1430	157	1587	941448	105928	1047376	8.57MB	1.12MB	9.69MB	fineweb-2, mala
cop_Copt	1430	129	1559	931697	84048	1015745	11.86MB	3.45MB	15.30MB	fineweb-2
emp_Latn	1394	135	1529	1054420	102650	1157070	9.18MB	1009.31KB	10.16MB	fineweb-2, mala
mah_Latn	1378	311	1689	1047098	210131	1257229	6.20MB	2.46MB	8.66MB	fineweb-2, mala
rar_Latn	1374	175	1549	749786	95496	845282	4.49MB	721.28KB	5.19MB	fineweb-2
nyn_Latn	1343	140	1483	403755	42089	445844	3.36MB	1.07MB	4.44MB	fineweb-2
arc_Syrc	1338	167	1505	47024	5869	52893	931.34KB	237.02KB	1.14MB	mala
lub_Latn	1336	102	1438	2075990	158496	2234486	11.96MB	3.33MB	15.29MB	mala
tzj_Latn	1322	98	1420	681051	55569	736620	4.58MB	576.44KB	5.15MB	fineweb-2, mala
alz_Latn	1293	278	1571	549645	101670	651315	3.90MB	1.15MB	5.05MB	fineweb-2, mala
lzh_Hani	1279	120	1399	755209	67205	822414	6.64MB	3.69MB	10.33MB	fineweb-2, mala
nnb_Latn	1253	85	1338	1227157	78584	1305741	10.21MB	738.70KB	10.93MB	fineweb-2, mala
fat_Latn	1250	153	1403	619588	75174	694762	3.08MB	586.85KB	3.65MB	fineweb-2, mala
aak_Latn	1247	106	1353	584798	49710	634508	6.29MB	886.18KB	7.16MB	fineweb-2
lhu_Latn	1205	130	1335	758117	84750	842867	4.21MB	981.16KB	5.17MB	fineweb-2, mala
maq_Latn	1189	82	1271	314400	21682	336082	2.15MB	628.14KB	2.76MB	fineweb-2
bjv_Latn	1179	137	1316	286749	33320	320069	1.93MB	556.83KB	2.47MB	fineweb-2
rut_Cyrl	1172	278	1450	34236	8121	42357	686.50KB	285.70KB	972.20KB	mala
bvr_Latn	1171	85	1256	223314	16209	239523	2.33MB	614.00KB	2.93MB	fineweb-2
bum_Latn	1162	132	1294	1036678	115175	1151853	6.01MB	645.64KB	6.64MB	fineweb-2, mala
khs_Latn	1143	83	1226	278153	20198	298351	2.56MB	468.56KB	3.01MB	fineweb-2
tok_Latn	1143	85	1228	306560	22797	329357	1.89MB	451.88KB	2.33MB	fineweb-2
kcg_Latn	1136	111	1247	172066	16666	188732	1.16MB	210.37KB	1.37MB	fineweb-2, mala
roa_Latn	1115	145	1260	175426	22813	198239	979.68KB	1.01MB	1.97MB	mala
hmr_Latn	1107	67	1174	1291931	78192	1370123	6.52MB	1.42MB	7.94MB	fineweb-2
yrk_Cyrl	1103	125	1228	294774	25423	320197	4.62MB	356.88KB	4.97MB	fineweb-2, mala
qub_Latn	1098	101	1199	582341	55090	637431	6.23MB	512.21KB	6.73MB	fineweb-2, mala
tuk_Cyrl	1098	125	1223	8257646	989927	9247573	22.74MB	7.12MB	29.86MB	fineweb-2, mala
jac_Latn	1089	86	1175	955275	75413	1030688	6.61MB	857.27KB	7.45MB	fineweb-2, mala
amp_Latn	1084	84	1168	2099409	163093	2262502	3.20MB	14.34MB	17.55MB	fineweb-2, mala
chr_Cher	1084	87	1171	567046	70003	637049	2.87MB	1.32MB	4.20MB	fineweb-2, mala
rup_Latn	1080	144	1224	1091025	145470	1236495	3.45MB	5.13MB	8.59MB	fineweb-2
sid_Latn	1076	98	1174	266680	24288	290968	2.85MB	387.28KB	3.23MB	fineweb-2
cav_Latn	1074	70	1144	248032	16166	264198	2.30MB	407.59KB	2.70MB	fineweb-2
fkv_Latn	1073	85	1158	439236	34795	474031	3.85MB	567.39KB	4.41MB	fineweb-2
qug_Latn	1067	100	1167	854879	80119	934998	8.17MB	827.39KB	8.97MB	fineweb-2
her_Latn	1044	97	1141	427956	39762	467718	3.47MB	537.25KB	3.99MB	fineweb-2
bem_Latn	1038	108	1146	532202	55529	587731	4.36MB	425.71KB	4.77MB	fineweb-2, mala
kwn_Latn	1030	80	1110	394744	30659	425403	3.19MB	574.29KB	3.76MB	fineweb-2
mas_Latn	1016	148	1164	430618	50363	480981	4.15MB	490.68KB	4.63MB	fineweb-2, mala
gpe_Latn	998	106	1104	268229	28489	296718	1.78MB	462.59KB	2.23MB	mala
ify_Latn	991	118	1109	681798	81250	763048	4.49MB	641.76KB	5.11MB	fineweb-2, mala
mal_Latn	978	174	1152	1512693	269129	1781822	4.05MB	1.32MB	5.37MB	fineweb-2
tiv_Latn	972	87	1059	1913949	171310	2085259	9.50MB	782.92KB	10.26MB	mala
agu_Latn	969	99	1068	417313	42635	459948	2.82MB	416.05KB	3.23MB	fineweb-2
bon_Latn	963	36	999	280194	10474	290668	2.30MB	219.18KB	2.51MB	fineweb-2
ppk_Latn	955	108	1063	711205	80429	791634	5.08MB	550.30KB	5.62MB	mala
asm_Latn	949	155	1104	1636744	267329	1904073	3.86MB	1.53MB	5.39MB	fineweb-2
zpa_Latn	942	146	1088	217011	33634	250645	2.05MB	472.62KB	2.51MB	fineweb-2
sus_Latn	939	66	1005	792968	56354	849322	3.89MB	1.24MB	5.13MB	fineweb-2, mala
ahk_Latn	938	100	1038	183241	20955	204196	754.18KB	910.11KB	1.63MB	fineweb-2, mala
pis_Latn	932	102	1034	237485	25115	262600	1.67MB	601.20KB	2.26MB	fineweb-2, mala
npi_Latn	918	134	1052	416623	60814	477437	2.91MB	1.57MB	4.47MB	fineweb-2
sja_Latn	906	92	998	630606	62312	692918	5.78MB	558.93KB	6.33MB	fineweb-2, mala
kas_Deva	896	96	992	979220	104916	1084136	3.93MB	1.04MB	4.97MB	fineweb-2
wrk_Latn	895	71	966	243213	19293	262506	2.14MB	597.70KB	2.72MB	fineweb-2
got_Goth	892	71	963	82648	8053	90701	1.38MB	383.44KB	1.75MB	fineweb-2, mala
hns_Latn	892	140	1032	314966	49434	364400	2.13MB	1.24MB	3.37MB	fineweb-2
mar_Latn	884	92	976	1398860	145582	1544442	2.27MB	2.23MB	4.50MB	fineweb-2
dik_Latn	876	93	969	415625	44124	459749	2.80MB	522.11KB	3.31MB	fineweb-2
jiv_Latn	876	60	936	500352	34305	534657	4.37MB	498.50KB	4.86MB	fineweb-2, mala
kri_Latn	872	98	970	940875	94092	1034967	4.98MB	759.49KB	5.72MB	fineweb-2, mala
cok_Latn	868	50	918	274722	15825	290547	2.77MB	407.85KB	3.17MB	fineweb-2
gui_Latn	868	82	950	533766	52374	586140	3.82MB	474.78KB	4.29MB	fineweb-2, mala
mak_Latn	866	82	948	609528	56569	666097	4.31MB	1.73MB	6.03MB	fineweb-2, mala
hak_Hani	861	140	1001	1703303	281910	1985213	3.70MB	2.36MB	6.06MB	fineweb-2, mala
bsn_Latn	858	64	922	249744	18628	268372	2.37MB	369.96KB	2.73MB	fineweb-2
hnj_Latn	848	58	906	677721	41785	719506	3.80MB	265.30KB	4.06MB	fineweb-2, mala
dov_Latn	846	73	919	862945	74462	937407	6.61MB	1.33MB	7.94MB	mala
teo_Latn	844	47	891	424816	23748	448564	3.30MB	378.12KB	3.67MB	fineweb-2, mala
kjb_Latn	840	93	933	63621	8461	72082	523.52KB	214.59KB	738.12KB	fineweb-2, mala
maz_Latn	834	94	928	283609	40790	324399	1.88MB	515.91KB	2.38MB	fineweb-2, mala
krj_Latn	828	67	895	613015	49603	662618	4.07MB	402.49KB	4.46MB	fineweb-2
dar_Cyrl	826	133	959	361614	54699	416313	5.96MB	1.26MB	7.22MB	fineweb-2, mala
aom_Latn	819	64	883	281780	22019	303799	2.44MB	427.06KB	2.86MB	fineweb-2
din_Latn	809	66	875	534997	43646	578643	2.98MB	799.05KB	3.76MB	mala
ffm_Latn	807	68	875	519366	43561	562927	3.58MB	561.33KB	4.13MB	fineweb-2, mala
rwo_Latn	796	116	912	854111	121747	975858	6.30MB	808.80KB	7.09MB	fineweb-2, mala
mag_Deva	794	116	910	676012	104599	780611	7.92MB	2.06MB	9.98MB	fineweb-2, mala
sny_Latn	782	56	838	247016	17689	264705	1.63MB	404.09KB	2.02MB	fineweb-2
gum_Latn	778	60	838	361423	27873	389296	3.30MB	271.11KB	3.57MB	fineweb-2
med_Latn	774	56	830	453646	32821	486467	2.91MB	284.88KB	3.19MB	fineweb-2
twu_Latn	765	67	832	744127	65990	810117	4.46MB	478.61KB	4.93MB	fineweb-2, mala
acn_Latn	743	78	821	1248548	131072	1379620	5.22MB	2.81MB	8.03MB	fineweb-2
pwn_Latn	741	99	840	303003	40560	343563	2.04MB	636.90KB	2.66MB	fineweb-2, mala
cbs_Latn	736	57	793	229564	17778	247342	2.10MB	191.89KB	2.29MB	fineweb-2
enq_Latn	735	153	888	637402	133420	770822	4.26MB	1.10MB	5.36MB	fineweb-2, mala
ubu_Latn	735	110	845	850643	127307	977950	5.79MB	1.02MB	6.81MB	mala
ibb_Latn	732	52	784	685874	48723	734597	4.78MB	394.71KB	5.17MB	mala
abx_Latn	721	67	788	211557	19659	231216	1.70MB	305.01KB	2.00MB	fineweb-2
zty_Latn	711	58	769	233645	19059	252704	1.86MB	278.16KB	2.13MB	fineweb-2
aby_Latn	707	52	759	224294	16496	240790	1.72MB	234.22KB	1.94MB	fineweb-2
rme_Latn	703	111	814	160204	25295	185499	1.36MB	493.04KB	1.84MB	fineweb-2
ndc_Latn	702	85	787	346963	44690	391653	2.98MB	430.60KB	3.40MB	fineweb-2, mala
cui_Latn	700	55	755	145083	11399	156482	1.33MB	331.91KB	1.66MB	fineweb-2
lua_Latn	699	50	749	353962	25319	379281	2.67MB	313.71KB	2.98MB	fineweb-2
yby_Latn	693	55	748	274390	21776	296166	2.29MB	266.09KB	2.55MB	fineweb-2
rad_Latn	691	73	764	523250	55278	578528	3.25MB	368.85KB	3.61MB	fineweb-2
agg_Latn	689	43	732	314150	19605	333755	3.26MB	265.44KB	3.52MB	fineweb-2
agd_Latn	685	53	738	150502	11644	162146	1.19MB	254.87KB	1.43MB	fineweb-2
nij_Latn	677	56	733	392022	35445	427467	2.80MB	305.79KB	3.10MB	fineweb-2, mala
pih_Latn	674	108	782	26541	4253	30794	369.45KB	108.10KB	477.55KB	mala
bno_Latn	670	76	746	186465	21151	207616	1.51MB	412.29KB	1.91MB	fineweb-2
yre_Latn	670	62	732	61767	5715	67482	655.21KB	190.57KB	845.78KB	fineweb-2
tsg_Latn	667	58	725	399546	35744	435290	2.67MB	392.11KB	3.05MB	fineweb-2, mala
adj_Latn	664	52	716	290932	22783	313715	1.78MB	226.56KB	2.00MB	fineweb-2
fip_Latn	664	65	729	813564	79641	893205	5.96MB	603.93KB	6.55MB	mala
agx_Cyrl	663	49	712	225921	16697	242618	3.12MB	506.64KB	3.61MB	fineweb-2
cnk_Latn	663	42	705	517243	32766	550009	2.86MB	478.41KB	3.32MB	fineweb-2
umb_Latn	660	49	709	219708	16311	236019	1.70MB	251.83KB	1.94MB	fineweb-2
mfq_Latn	659	57	716	341852	29568	371420	1.96MB	276.74KB	2.23MB	fineweb-2
qve_Latn	657	70	727	310712	33089	343801	3.34MB	400.51KB	3.73MB	fineweb-2, mala
nhw_Latn	654	50	704	382118	29213	411331	3.46MB	200.66KB	3.66MB	fineweb-2
niu_Latn	649	90	739	331301	45943	377244	2.01MB	345.74KB	2.34MB	fineweb-2
zne_Latn	647	112	759	611820	75607	687427	3.76MB	609.48KB	4.35MB	fineweb-2, mala
kwy_Latn	643	49	692	286134	21804	307938	2.11MB	236.97KB	2.35MB	fineweb-2
skg_Latn	634	90	724	198445	28170	226615	1.70MB	276.20KB	1.97MB	fineweb-2
maa_Latn	633	61	694	366555	35323	401878	3.37MB	313.70KB	3.68MB	fineweb-2
xsm_Latn	619	42	661	360391	24453	384844	2.20MB	177.36KB	2.37MB	fineweb-2
jra_Latn	618	67	685	570280	58819	629099	3.59MB	442.29KB	4.03MB	fineweb-2, mala
vap_Latn	614	46	660	280120	20986	301106	1.64MB	401.77KB	2.03MB	fineweb-2
inb_Latn	612	71	683	328467	38449	366916	3.44MB	422.88KB	3.85MB	fineweb-2, mala
pau_Latn	611	60	671	934401	91283	1025684	4.56MB	919.19KB	5.46MB	fineweb-2, mala
kmg_Latn	606	106	712	469538	80773	550311	3.05MB	1.28MB	4.33MB	fineweb-2, mala
alp_Latn	601	49	650	158006	12882	170888	1.16MB	208.66KB	1.37MB	fineweb-2
des_Latn	595	54	649	253980	23050	277030	2.32MB	185.84KB	2.50MB	fineweb-2
kmr_Cyrl	585	54	639	148849	13739	162588	1.98MB	430.81KB	2.40MB	fineweb-2
syr_Syrc	582	103	685	1592695	281868	1874563	15.71MB	8.38MB	24.10MB	mala
qwh_Latn	569	52	621	198192	18112	216304	2.20MB	286.49KB	2.48MB	fineweb-2
mkn_Latn	563	64	627	571272	65915	637187	3.45MB	419.12KB	3.86MB	fineweb-2, mala
gan_Hani	562	60	622	22057	2354	24411	898.64KB	394.15KB	1.26MB	mala
jvn_Latn	561	51	612	566157	51137	617294	3.45MB	1.04MB	4.49MB	fineweb-2, mala
klv_Latn	554	40	594	264082	19067	283149	2.10MB	192.18KB	2.29MB	fineweb-2
jbu_Latn	551	38	589	266313	18366	284679	1.52MB	159.81KB	1.68MB	fineweb-2
xon_Latn	551	41	592	294374	21904	316278	1.76MB	128.60KB	1.89MB	fineweb-2
bin_Latn	546	49	595	275329	24709	300038	1.93MB	175.70KB	2.10MB	fineweb-2
ptu_Latn	542	43	585	341754	27113	368867	2.78MB	147.84KB	2.92MB	fineweb-2
mmn_Latn	537	41	578	168631	12874	181505	1.22MB	183.16KB	1.40MB	fineweb-2
mpx_Latn	537	64	601	364490	43440	407930	2.49MB	226.44KB	2.71MB	fineweb-2
bib_Latn	535	74	609	232325	32134	264459	1.41MB	279.74KB	1.68MB	fineweb-2
mgr_Latn	535	33	568	210683	12995	223678	1.80MB	131.37KB	1.93MB	fineweb-2
bdh_Latn	531	57	588	133691	14351	148042	968.82KB	438.43KB	1.37MB	fineweb-2
knj_Latn	528	58	586	464101	50677	514778	2.55MB	306.33KB	2.85MB	fineweb-2, mala
syl_Latn	524	124	648	11421122	2702708	14123830	99.41MB	8.40MB	107.81MB	fineweb-2
dop_Latn	522	47	569	244913	22051	266964	1.96MB	126.71KB	2.08MB	fineweb-2
mwq_Latn	522	54	576	308284	31891	340175	1.78MB	210.48KB	1.98MB	fineweb-2
rmy_Cyrl	519	51	570	1490127	146706	1636833	2.46MB	1.38MB	3.84MB	fineweb-2, mala
kkc_Latn	518	28	546	220359	11911	232270	1.64MB	125.15KB	1.76MB	fineweb-2
sop_Latn	516	58	574	269725	30318	300043	2.24MB	196.05KB	2.43MB	fineweb-2
acu_Latn	513	31	544	196786	11891	208677	1.86MB	157.34KB	2.01MB	fineweb-2
tbc_Latn	512	42	554	289017	23708	312725	1.73MB	188.32KB	1.91MB	fineweb-2
cgc_Latn	511	62	573	250877	30026	280903	2.14MB	275.33KB	2.41MB	fineweb-2, mala
zpu_Latn	507	43	550	299051	25363	324414	2.03MB	363.98KB	2.39MB	fineweb-2
ata_Latn	506	37	543	327471	23945	351416	2.25MB	206.34KB	2.46MB	fineweb-2
ckt_Cyrl	506	98	604	80545	12540	93085	905.95KB	948.26KB	1.81MB	fineweb-2, mala
imo_Latn	500	29	529	220108	12766	232874	1.70MB	102.58KB	1.80MB	fineweb-2
mif_Latn	493	44	537	280591	25042	305633	1.76MB	200.91KB	1.95MB	fineweb-2
apr_Latn	490	34	524	187964	13042	201006	1.12MB	194.38KB	1.31MB	fineweb-2
kyq_Latn	489	61	550	307142	38314	345456	2.06MB	236.70KB	2.29MB	fineweb-2
zyp_Latn	488	37	525	334938	25394	360332	2.12MB	269.94KB	2.38MB	fineweb-2
gag_Cyrl	481	59	540	2581168	317841	2899009	8.05MB	1.17MB	9.22MB	fineweb-2, mala
mbi_Latn	478	52	530	127445	13864	141309	865.60KB	280.41KB	1.12MB	fineweb-2
kan_Latn	476	64	540	505560	67974	573534	1.40MB	535.90KB	1.92MB	fineweb-2
att_Latn	475	49	524	126084	13006	139090	922.85KB	256.21KB	1.15MB	fineweb-2
kqp_Latn	474	44	518	255812	23746	279558	1.57MB	141.99KB	1.71MB	fineweb-2
koo_Latn	472	45	517	177640	16936	194576	1.69MB	263.34KB	1.95MB	fineweb-2
tsz_Latn	471	60	531	133392	16992	150384	1.38MB	226.69KB	1.60MB	fineweb-2
urh_Latn	471	44	515	213563	19950	233513	1.58MB	193.43KB	1.77MB	fineweb-2
bbr_Latn	469	44	513	289329	27143	316472	1.98MB	214.09KB	2.19MB	fineweb-2
rmo_Latn	468	100	568	237230	50690	287920	1.62MB	342.10KB	1.96MB	fineweb-2
sus_Arab	465	67	532	320786	46220	367006	3.14MB	430.23KB	3.56MB	fineweb-2
bas_Latn	464	52	516	534370	56256	590626	2.68MB	510.85KB	3.18MB	fineweb-2, mala
msy_Latn	464	48	512	365717	37832	403549	2.77MB	222.88KB	2.99MB	fineweb-2
bus_Latn	463	55	518	289927	34885	324812	1.98MB	291.81KB	2.27MB	fineweb-2, mala
lee_Latn	461	33	494	258452	18500	276952	1.81MB	141.70KB	1.95MB	fineweb-2
mdy_Ethi	459	50	509	251945	27444	279389	3.05MB	321.40KB	3.37MB	fineweb-2
dig_Latn	455	29	484	276333	17630	293963	2.17MB	114.77KB	2.29MB	fineweb-2, mala
fuh_Latn	453	73	526	256371	41381	297752	1.77MB	475.96KB	2.24MB	fineweb-2, mala
mcu_Latn	453	39	492	238878	20565	259443	1.56MB	152.06KB	1.71MB	fineweb-2
nas_Latn	449	49	498	251755	27630	279385	2.73MB	177.00KB	2.91MB	fineweb-2, mala
nfr_Latn	449	35	484	168556	13139	181695	1.03MB	126.96KB	1.15MB	fineweb-2
izr_Latn	446	38	484	189756	16167	205923	1.13MB	110.31KB	1.23MB	fineweb-2
bvz_Latn	443	31	474	485999	34008	520007	2.81MB	214.81KB	3.01MB	fineweb-2
hto_Latn	442	41	483	226267	20988	247255	2.21MB	161.53KB	2.37MB	fineweb-2
mrj_Latn	441	46	487	91180	9510	100690	620.47KB	802.95KB	1.39MB	mala
ded_Latn	439	36	475	325477	26570	352047	2.59MB	181.41KB	2.77MB	fineweb-2, mala
ikt_Latn	439	32	471	183323	13362	196685	1.90MB	1.03MB	2.93MB	fineweb-2
mzw_Latn	438	45	483	184465	18951	203416	1.17MB	125.67KB	1.29MB	fineweb-2
qxh_Latn	438	44	482	200816	20247	221063	2.33MB	210.21KB	2.54MB	fineweb-2, mala
bgt_Latn	434	32	466	144996	10691	155687	1.06MB	100.01KB	1.16MB	fineweb-2
quf_Latn	434	32	466	255875	19177	275052	2.67MB	166.99KB	2.83MB	fineweb-2, mala
chz_Latn	430	49	479	257775	29374	287149	2.66MB	222.40KB	2.88MB	fineweb-2
gux_Latn	430	41	471	274956	26216	301172	1.52MB	203.84KB	1.72MB	fineweb-2
bnp_Latn	429	40	469	403532	37625	441157	2.17MB	182.40KB	2.34MB	fineweb-2
yal_Latn	428	33	461	296718	22877	319595	1.82MB	136.40KB	1.96MB	fineweb-2
gof_Latn	427	30	457	201988	15228	217216	1.63MB	291.21KB	1.92MB	fineweb-2, mala
npy_Latn	426	25	451	205132	12038	217170	1.55MB	154.03KB	1.70MB	fineweb-2
kkj_Latn	425	47	472	251709	27836	279545	1.67MB	206.43KB	1.88MB	fineweb-2
bmr_Latn	424	36	460	179150	15210	194360	2.13MB	186.56KB	2.31MB	fineweb-2
xla_Latn	424	35	459	408975	33759	442734	3.11MB	257.88KB	3.36MB	fineweb-2
zyb_Latn	424	34	458	162000	12990	174990	1.45MB	244.09KB	1.69MB	fineweb-2
ljp_Latn	422	37	459	219688	19261	238949	1.60MB	211.42KB	1.81MB	fineweb-2
guh_Latn	421	55	476	368159	48386	416545	3.72MB	542.42KB	4.25MB	fineweb-2, mala
nod_Thai	421	43	464	20977	2131	23108	2.21MB	173.41KB	2.38MB	fineweb-2, mala
dwr_Latn	420	44	464	460201	53680	513881	2.94MB	2.14MB	5.08MB	fineweb-2, mala
nrf_Latn	418	36	454	120604	10387	130991	870.28KB	390.62KB	1.23MB	fineweb-2
gfk_Latn	413	33	446	419290	33502	452792	2.43MB	132.71KB	2.56MB	fineweb-2
dob_Latn	410	37	447	312059	28161	340220	2.52MB	242.26KB	2.76MB	fineweb-2
gdr_Latn	408	37	445	34502	3128	37630	426.64KB	110.53KB	537.17KB	fineweb-2
xrb_Latn	402	33	435	182709	14998	197707	1.09MB	108.50KB	1.20MB	fineweb-2
kao_Latn	400	34	434	107594	9145	116739	779.00KB	94.94KB	873.94KB	fineweb-2
njo_Latn	400	35	435	336720	29463	366183	2.71MB	125.46KB	2.83MB	fineweb-2
gai_Latn	396	28	424	116296	8222	124518	958.53KB	191.19KB	1.12MB	fineweb-2
ura_Latn	395	53	448	188109	25240	213349	1.85MB	175.30KB	2.02MB	fineweb-2
irk_Latn	394	45	439	195268	22302	217570	1.60MB	160.94KB	1.76MB	fineweb-2
agr_Latn	393	42	435	215730	23486	239216	1.86MB	350.19KB	2.21MB	fineweb-2, mala
pjt_Latn	393	50	443	281208	35777	316985	2.79MB	364.74KB	3.14MB	fineweb-2
ztq_Latn	393	20	413	186839	9508	196347	1.27MB	60.72KB	1.33MB	fineweb-2
aai_Latn	392	32	424	236207	19282	255489	1.81MB	95.52KB	1.91MB	fineweb-2
kez_Latn	392	31	423	177679	14051	191730	1.62MB	105.24KB	1.72MB	fineweb-2
bgr_Latn	391	42	433	219648	23594	243242	1.32MB	171.09KB	1.48MB	fineweb-2
aii_Syrc	390	30	420	354419	27263	381682	4.75MB	916.42KB	5.64MB	fineweb-2
spp_Latn	389	52	441	287132	42278	329410	1.61MB	381.44KB	1.98MB	fineweb-2, mala
gde_Latn	387	24	411	218406	13544	231950	1.62MB	95.13KB	1.71MB	fineweb-2
gvl_Latn	385	51	436	356735	50737	407472	2.09MB	219.72KB	2.31MB	fineweb-2, mala
dgi_Latn	384	39	423	165036	16761	181797	1023.39KB	149.99KB	1.15MB	fineweb-2
ifk_Latn	383	37	420	197064	19037	216101	1.44MB	81.29KB	1.52MB	fineweb-2
knc_Latn	383	54	437	103084	14534	117618	795.54KB	403.53KB	1.17MB	fineweb-2
ntu_Latn	382	45	427	180167	21223	201390	1.38MB	203.67KB	1.58MB	fineweb-2
kmh_Latn	381	40	421	387937	40728	428665	2.52MB	197.16KB	2.71MB	fineweb-2
men_Latn	380	30	410	196311	15498	211809	1.25MB	135.38KB	1.38MB	fineweb-2
dad_Latn	379	43	422	335247	38036	373283	2.17MB	225.38KB	2.39MB	fineweb-2
dnj_Latn	378	32	410	293474	24844	318318	2.01MB	126.92KB	2.13MB	fineweb-2
bhl_Latn	377	37	414	459892	45135	505027	2.96MB	224.34KB	3.18MB	fineweb-2
rnd_Latn	377	51	428	102356	13846	116202	919.32KB	173.47KB	1.07MB	fineweb-2
cpa_Latn	376	62	438	98539	16248	114787	1.15MB	185.34KB	1.34MB	fineweb-2
itv_Latn	376	28	404	231473	17237	248710	1.56MB	88.06KB	1.65MB	fineweb-2
aji_Latn	375	43	418	172691	19802	192493	1.19MB	181.10KB	1.37MB	fineweb-2
sdc_Latn	375	70	445	211790	39534	251324	954.96KB	1.08MB	2.02MB	fineweb-2
zpz_Latn	374	60	434	348432	55898	404330	1.97MB	227.47KB	2.19MB	fineweb-2
cce_Latn	373	27	400	371166	37855	409021	2.31MB	147.11KB	2.45MB	fineweb-2, mala
lgg_Latn	372	35	407	267517	24004	291521	1.58MB	199.11KB	1.78MB	fineweb-2, mala
miq_Latn	371	38	409	331614	39372	370986	1.81MB	528.33KB	2.33MB	fineweb-2, mala
isd_Latn	368	40	408	183480	19943	203423	1.27MB	145.71KB	1.41MB	fineweb-2
noa_Latn	368	50	418	381497	51978	433475	2.81MB	512.92KB	3.31MB	fineweb-2, mala
tac_Latn	368	33	401	226855	20342	247197	1.87MB	179.37KB	2.05MB	fineweb-2
fai_Latn	365	25	390	66357	4545	70902	561.53KB	103.34KB	664.87KB	fineweb-2
kwi_Latn	363	37	400	201393	20787	222180	1.68MB	190.28KB	1.87MB	fineweb-2, mala
pah_Latn	363	33	396	152786	13889	166675	1.23MB	224.01KB	1.45MB	fineweb-2
snd_Deva	361	37	398	530572	54379	584951	3.49MB	617.26KB	4.09MB	fineweb-2
sue_Latn	361	28	389	205929	15972	221901	1.37MB	136.22KB	1.50MB	fineweb-2
rmn_Cyrl	360	52	412	105724	15271	120995	1.22MB	350.08KB	1.56MB	fineweb-2
cbu_Latn	358	19	377	216920	11512	228432	2.25MB	112.22KB	2.36MB	fineweb-2
mdf_Latn	358	51	409	42869	6107	48976	595.07KB	131.18KB	726.25KB	mala
spy_Latn	355	37	392	159698	16644	176342	1.71MB	109.95KB	1.82MB	fineweb-2
eza_Latn	353	23	376	111174	7243	118417	936.30KB	100.28KB	1.01MB	fineweb-2
ozm_Latn	353	34	387	192145	18521	210666	1.38MB	159.40KB	1.54MB	fineweb-2, mala
gub_Latn	351	53	404	352715	52823	405538	2.08MB	434.09KB	2.50MB	fineweb-2, mala
cwt_Latn	350	42	392	180436	21652	202088	1.43MB	145.89KB	1.58MB	fineweb-2
kdr_Latn	349	32	381	469002	43003	512005	1.98MB	2.50MB	4.48MB	fineweb-2
biv_Latn	348	36	384	173870	17986	191856	1.07MB	118.05KB	1.19MB	fineweb-2
sml_Latn	348	49	397	164882	23303	188185	1.34MB	153.34KB	1.49MB	fineweb-2, mala
niv_Cyrl	348	109	457	6912	2165	9077	191.12KB	108.02KB	299.15KB	mala
akp_Latn	347	34	381	161552	15829	177381	1.16MB	89.82KB	1.25MB	fineweb-2
cas_Latn	347	42	389	165545	20037	185582	1.41MB	170.57KB	1.57MB	fineweb-2
ncx_Latn	347	60	407	83900	14507	98407	880.46KB	211.89KB	1.07MB	fineweb-2
cbv_Latn	346	32	378	150475	13916	164391	1.25MB	160.55KB	1.40MB	fineweb-2
enb_Latn	346	44	390	160370	20380	180750	1.67MB	127.32KB	1.80MB	fineweb-2, mala
dts_Latn	344	37	381	110811	11918	122729	876.43KB	108.39KB	984.81KB	fineweb-2
fal_Latn	344	36	380	162241	16978	179219	1.03MB	93.89KB	1.12MB	fineweb-2
nhu_Latn	344	56	400	1109165	180561	1289726	4.79MB	4.27MB	9.06MB	fineweb-2
mmo_Latn	343	37	380	153400	16547	169947	1001.99KB	270.91KB	1.24MB	fineweb-2
yss_Latn	342	47	389	346215	47579	393794	2.30MB	191.78KB	2.49MB	fineweb-2
gbo_Latn	341	29	370	219777	18690	238467	1.36MB	102.44KB	1.46MB	fineweb-2
kgp_Latn	340	36	376	136680	14472	151152	850.54KB	178.88KB	1.01MB	fineweb-2
bdd_Latn	339	30	369	221693	19618	241311	1.86MB	179.58KB	2.04MB	fineweb-2
bmh_Latn	339	22	361	335424	21767	357191	2.10MB	155.76KB	2.25MB	fineweb-2
sda_Latn	338	34	372	233646	23594	257240	1.72MB	329.23KB	2.04MB	fineweb-2, mala
mfi_Latn	336	34	370	206854	20931	227785	1.43MB	133.91KB	1.56MB	fineweb-2
boj_Latn	333	36	369	356850	38578	395428	2.31MB	278.33KB	2.58MB	fineweb-2
sas_Latn	331	62	393	203282	38076	241358	1.51MB	383.13KB	1.88MB	fineweb-2
vmy_Latn	331	34	365	146153	15012	161165	1.22MB	125.89KB	1.34MB	fineweb-2
blh_Latn	330	38	368	242155	27884	270039	1.77MB	119.11KB	1.89MB	fineweb-2
agn_Latn	329	38	367	252690	29186	281876	1.71MB	165.26KB	1.87MB	fineweb-2
dgc_Latn	329	24	353	139222	10156	149378	937.01KB	96.84KB	1.01MB	fineweb-2
gbi_Latn	329	23	352	271837	19003	290840	1.74MB	108.15KB	1.85MB	fineweb-2
qvh_Latn	326	17	343	144247	7561	151808	1.62MB	101.28KB	1.72MB	fineweb-2, mala
bpr_Latn	324	28	352	204027	17632	221659	1.24MB	83.89KB	1.32MB	fineweb-2
byr_Latn	324	54	378	266300	44383	310683	2.91MB	366.58KB	3.27MB	fineweb-2
ibg_Latn	324	44	368	80437	10923	91360	676.86KB	206.69KB	883.55KB	fineweb-2
sxn_Latn	322	38	360	224646	25713	250359	1.80MB	282.79KB	2.07MB	fineweb-2, mala
bku_Latn	321	64	385	624191	124449	748640	1.51MB	4.09MB	5.60MB	fineweb-2
udu_Latn	321	29	350	74155	6699	80854	578.46KB	77.09KB	655.55KB	fineweb-2
hay_Latn	319	19	338	128929	7679	136608	1.25MB	52.03KB	1.30MB	fineweb-2
car_Latn	318	36	354	167657	18980	186637	1.43MB	125.43KB	1.55MB	fineweb-2
sgb_Latn	318	35	353	160789	17697	178486	1.09MB	193.73KB	1.28MB	fineweb-2
avu_Latn	317	33	350	418949	43613	462562	3.13MB	165.42KB	3.29MB	fineweb-2
trn_Latn	317	18	335	183658	10428	194086	1.58MB	32.78KB	1.61MB	fineweb-2
tdx_Latn	316	39	355	259671	29987	289658	1.56MB	557.08KB	2.11MB	fineweb-2, mala
sua_Latn	315	49	364	511230	79524	590754	2.38MB	516.13KB	2.88MB	fineweb-2
urk_Thai	315	31	346	164805	16218	181023	2.39MB	253.39KB	2.64MB	fineweb-2
yli_Latn	315	34	349	202630	21871	224501	1.44MB	134.92KB	1.58MB	fineweb-2
dyi_Latn	313	45	358	319184	45889	365073	1.75MB	317.31KB	2.06MB	fineweb-2
heh_Latn	313	41	354	150192	19673	169865	1.32MB	156.29KB	1.47MB	fineweb-2
nhi_Latn	312	34	346	173171	18871	192042	1.56MB	188.53KB	1.75MB	fineweb-2
grt_Beng	311	28	339	145523	13101	158624	3.17MB	150.36KB	3.32MB	fineweb-2
gnn_Latn	310	38	348	268799	32949	301748	2.73MB	371.67KB	3.10MB	fineweb-2
mqb_Latn	310	27	337	278365	24244	302609	1.60MB	86.04KB	1.68MB	fineweb-2
tbg_Latn	310	26	336	274941	23059	298000	2.10MB	176.11KB	2.27MB	fineweb-2
mwv_Latn	307	22	329	158688	11371	170059	1.27MB	126.53KB	1.40MB	fineweb-2
gul_Latn	306	57	363	376997	70225	447222	1.59MB	731.54KB	2.31MB	fineweb-2
lex_Latn	306	12	318	250617	9828	260445	1.73MB	104.35KB	1.83MB	fineweb-2
qvz_Latn	305	25	330	177772	14474	192246	1.68MB	146.60KB	1.82MB	fineweb-2, mala
thk_Latn	305	35	340	147091	16926	164017	1.30MB	128.76KB	1.43MB	fineweb-2, mala
cjs_Cyrl	304	44	348	16661	4064	20725	329.04KB	87.97KB	417.01KB	fineweb-2, mala
rap_Latn	304	58	362	30900	7415	38315	236.03KB	80.82KB	316.85KB	fineweb-2, mala
mxt_Latn	303	22	325	111339	8084	119423	820.46KB	53.12KB	873.58KB	fineweb-2
pir_Latn	303	39	342	226747	29185	255932	1.90MB	193.53KB	2.09MB	fineweb-2
qup_Latn	301	42	343	106115	15855	121970	1.11MB	229.73KB	1.33MB	fineweb-2, mala
bxr_Latn	301	40	341	24311	3230	27541	364.62KB	158.33KB	522.96KB	mala
qvm_Latn	300	23	323	135169	10394	145563	1.56MB	81.65KB	1.64MB	fineweb-2, mala
tsc_Latn	300	42	342	173801	28496	202297	1.09MB	232.32KB	1.32MB	fineweb-2, mala
bao_Latn	299	31	330	219245	22731	241976	1.67MB	136.01KB	1.80MB	fineweb-2
myy_Latn	296	35	331	242359	28657	271016	1.90MB	152.39KB	2.05MB	fineweb-2
xbi_Latn	296	18	314	124185	7551	131736	925.31KB	99.39KB	1.00MB	fineweb-2
mop_Latn	295	30	325	272349	27696	300045	1.63MB	125.37KB	1.76MB	fineweb-2
sbe_Latn	295	26	321	165120	14553	179673	1.24MB	99.49KB	1.34MB	fineweb-2
dyo_Latn	294	25	319	150986	12838	163824	1.30MB	77.70KB	1.37MB	fineweb-2
lam_Latn	294	27	321	136096	12498	148594	1.28MB	72.17KB	1.35MB	fineweb-2
tcf_Latn	294	19	313	104152	6730	110882	942.79KB	172.12KB	1.09MB	fineweb-2
kyc_Latn	293	34	327	220672	25606	246278	1.59MB	183.41KB	1.77MB	fineweb-2
kqn_Latn	292	47	339	134741	21687	156428	1.11MB	205.82KB	1.31MB	fineweb-2
nho_Latn	292	16	308	238250	13054	251304	1.37MB	83.97KB	1.45MB	fineweb-2
sat_Latn	292	16	308	300198	16449	316647	925.24KB	66.64KB	991.88KB	fineweb-2
jae_Latn	291	40	331	228696	31436	260132	1.64MB	228.91KB	1.86MB	fineweb-2
dua_Latn	290	39	329	128651	17301	145952	877.75KB	163.53KB	1.02MB	fineweb-2
mpp_Latn	290	20	310	169573	11694	181267	1.54MB	63.73KB	1.61MB	fineweb-2
ntr_Latn	290	31	321	233543	24965	258508	1.49MB	98.47KB	1.59MB	fineweb-2
tuo_Latn	290	37	327	221618	28275	249893	2.04MB	258.86KB	2.30MB	fineweb-2
ipk_Latn	289	42	331	9326	1355	10681	168.63KB	58.16KB	226.79KB	mala
mni_Latn	288	26	314	723877	65350	789227	1.73MB	398.04KB	2.12MB	fineweb-2
pma_Latn	288	13	301	243689	10999	254688	1.53MB	101.71KB	1.63MB	fineweb-2
tee_Latn	287	39	326	232848	31641	264489	1.89MB	175.44KB	2.06MB	fineweb-2
kpe_Latn	286	28	314	143789	14077	157866	941.70KB	338.72KB	1.25MB	fineweb-2
kpy_Cyrl	286	58	344	6233	1264	7497	172.18KB	53.66KB	225.85KB	mala
izz_Latn	284	27	311	223081	21134	244215	1.55MB	158.33KB	1.71MB	fineweb-2, mala
coe_Latn	283	33	316	161521	18834	180355	1.74MB	150.87KB	1.89MB	fineweb-2
kki_Latn	283	25	308	122535	10824	133359	1.11MB	46.93KB	1.16MB	fineweb-2
soq_Latn	283	11	294	177268	6890	184158	1.35MB	24.29KB	1.37MB	fineweb-2
atb_Latn	282	31	313	144505	15885	160390	930.45KB	135.34KB	1.04MB	fineweb-2
cbc_Latn	282	30	312	253066	26921	279987	2.38MB	202.61KB	2.58MB	fineweb-2
smk_Latn	281	37	318	176335	23218	199553	1.25MB	183.54KB	1.43MB	fineweb-2
hig_Latn	280	22	302	153185	12039	165224	1.10MB	80.49KB	1.17MB	fineweb-2, mala
nct_Latn	280	49	329	56069	9812	65881	506.23KB	152.78KB	659.01KB	fineweb-2
swg_Latn	280	32	312	686209	78423	764632	3.01MB	1.40MB	4.41MB	fineweb-2
mlp_Latn	279	17	296	272802	16622	289424	1.66MB	68.51KB	1.73MB	fineweb-2
bss_Latn	278	57	335	166043	34097	200140	1.53MB	204.73KB	1.73MB	fineweb-2, mala
knf_Latn	277	34	311	139905	17172	157077	1.04MB	116.57KB	1.16MB	fineweb-2
kbr_Latn	275	29	304	139189	14678	153867	1.31MB	128.83KB	1.43MB	fineweb-2
aey_Latn	274	19	293	261706	18147	279853	1.76MB	103.24KB	1.86MB	fineweb-2
bmu_Latn	274	29	303	227312	24058	251370	1.94MB	209.42KB	2.14MB	fineweb-2
sig_Latn	274	26	300	242684	23028	265712	1.44MB	137.94KB	1.58MB	fineweb-2
zpl_Latn	274	27	301	220886	21766	242652	1.49MB	175.71KB	1.66MB	fineweb-2
qvs_Latn	273	20	293	147849	10831	158680	1.62MB	89.30KB	1.71MB	fineweb-2
qxr_Latn	273	29	302	131044	13925	144969	1.35MB	147.12KB	1.49MB	fineweb-2, mala
gkn_Latn	270	51	321	85658	16179	101837	728.24KB	110.49KB	838.73KB	fineweb-2
dww_Latn	269	15	284	247737	13814	261551	1.81MB	89.59KB	1.90MB	fineweb-2
kpr_Latn	268	32	300	233252	27851	261103	1.65MB	141.22KB	1.78MB	fineweb-2
mxp_Latn	268	29	297	130830	14156	144986	1.28MB	165.26KB	1.44MB	fineweb-2
ory_Latn	268	31	299	132810	15362	148172	1.01MB	194.26KB	1.20MB	fineweb-2
ted_Latn	268	14	282	82961	4333	87294	630.46KB	56.84KB	687.30KB	fineweb-2
kto_Latn	267	13	280	240219	11696	251915	1.48MB	81.02KB	1.56MB	fineweb-2
yuj_Latn	266	15	281	283233	15971	299204	2.01MB	114.50KB	2.12MB	fineweb-2
lef_Latn	265	31	296	164162	19203	183365	1.25MB	56.25KB	1.31MB	fineweb-2
sim_Latn	265	15	280	225866	12784	238650	1.45MB	87.30KB	1.53MB	fineweb-2
cnt_Latn	264	47	311	161711	28789	190500	2.02MB	193.01KB	2.20MB	fineweb-2
knc_Arab	263	27	290	884822	90837	975659	4.94MB	5.23MB	10.17MB	fineweb-2
viv_Latn	262	21	283	253839	20345	274184	2.13MB	183.80KB	2.31MB	fineweb-2
ziw_Latn	262	19	281	129849	9416	139265	1.06MB	62.58KB	1.12MB	fineweb-2
qvw_Latn	260	22	282	95637	8092	103729	1.17MB	100.35KB	1.27MB	fineweb-2
sur_Latn	259	33	292	192336	24506	216842	1.17MB	114.79KB	1.28MB	fineweb-2
cre_Latn	257	37	294	149969	21590	171559	1.34MB	400.96KB	1.73MB	mala
gog_Latn	254	29	283	149389	17056	166445	1.16MB	87.86KB	1.25MB	fineweb-2
lue_Latn	253	26	279	111055	8990	120045	1012.46KB	173.00KB	1.16MB	fineweb-2, mala
orv_Cyrl	252	49	301	16392	3187	19579	393.85KB	103.40KB	497.25KB	fineweb-2
suk_Latn	252	35	287	140846	19562	160408	1.05MB	144.48KB	1.19MB	fineweb-2
vun_Latn	252	18	270	156101	11150	167251	1.13MB	238.94KB	1.36MB	fineweb-2
niv_Latn	252	60	312	10167	2420	12587	351.01KB	87.03KB	438.04KB	mala
kde_Latn	251	20	271	87304	6956	94260	853.98KB	61.01KB	914.99KB	fineweb-2
mcq_Latn	250	30	280	170510	20461	190971	1.36MB	170.05KB	1.52MB	fineweb-2
cya_Latn	249	26	275	218500	22815	241315	1.32MB	80.53KB	1.40MB	fineweb-2
cax_Latn	248	38	286	134535	20614	155149	1.25MB	220.63KB	1.46MB	fineweb-2
chd_Latn	248	13	261	190861	10033	200894	1.32MB	572.18KB	1.87MB	fineweb-2, mala
kus_Latn	248	21	269	136220	11534	147754	822.80KB	62.87KB	885.67KB	fineweb-2
lid_Latn	248	19	267	299329	22932	322261	1.74MB	151.72KB	1.89MB	fineweb-2
koi_Latn	248	32	280	31053	4006	35059	349.10KB	152.15KB	501.25KB	mala
blz_Latn	245	27	272	168370	18555	186925	1.27MB	99.88KB	1.37MB	fineweb-2
tos_Latn	245	22	267	198306	17807	216113	2.30MB	163.97KB	2.46MB	fineweb-2
alq_Latn	244	40	284	125818	20625	146443	1.11MB	216.37KB	1.32MB	fineweb-2
knk_Latn	244	31	275	162896	20695	183591	1.03MB	79.41KB	1.11MB	fineweb-2
kpz_Latn	244	27	271	146831	16247	163078	1.09MB	151.20KB	1.24MB	fineweb-2
taj_Deva	244	29	273	160063	20281	180344	2.88MB	400.20KB	3.27MB	fineweb-2, mala
chw_Latn	243	41	284	73084	12331	85415	615.54KB	304.30KB	919.84KB	fineweb-2
tpt_Latn	243	31	274	126606	16151	142757	1.18MB	75.11KB	1.26MB	fineweb-2
yon_Latn	243	18	261	220262	16315	236577	1.57MB	123.44KB	1.69MB	fineweb-2
bfo_Latn	239	73	312	53647	16386	70033	385.62KB	158.84KB	544.46KB	fineweb-2
guj_Latn	239	20	259	656504	54937	711441	904.19KB	950.03KB	1.81MB	fineweb-2
myw_Latn	239	18	257	154513	11637	166150	1.29MB	76.21KB	1.37MB	fineweb-2
zia_Latn	238	12	250	245100	12358	257458	1.60MB	84.92KB	1.68MB	fineweb-2
bfd_Latn	237	26	263	227719	25045	252764	1.50MB	96.87KB	1.60MB	fineweb-2, mala
mil_Latn	237	35	272	155224	22923	178147	1.38MB	168.52KB	1.55MB	fineweb-2
nyo_Latn	237	13	250	106792	5880	112672	1021.19KB	59.97KB	1.06MB	fineweb-2, mala
bgs_Latn	236	24	260	94358	9595	103953	677.03KB	122.09KB	799.12KB	fineweb-2
cle_Latn	236	25	261	126172	13365	139537	1.54MB	37.24KB	1.58MB	fineweb-2
hag_Latn	236	34	270	149591	21551	171142	898.77KB	83.87KB	982.64KB	fineweb-2
kdi_Latn	236	19	255	130842	10533	141375	700.83KB	243.67KB	944.51KB	fineweb-2
sgw_Ethi	236	17	253	106968	7705	114673	1.38MB	131.95KB	1.51MB	fineweb-2
esu_Latn	235	21	256	71464	6386	77850	967.67KB	125.18KB	1.07MB	fineweb-2
lfn_Cyrl	235	22	257	542145	50754	592899	1.57MB	451.34KB	2.01MB	fineweb-2
gun_Latn	234	18	252	145570	11197	156767	1022.77KB	116.09KB	1.11MB	fineweb-2
lsm_Latn	234	26	260	131230	14581	145811	1.17MB	81.86KB	1.25MB	fineweb-2
cpy_Latn	233	13	246	85001	4742	89743	1.14MB	62.13KB	1.20MB	fineweb-2
ota_Arab	233	27	260	69447	8047	77494	869.87KB	234.90KB	1.08MB	fineweb-2
mux_Latn	232	19	251	307270	25164	332434	2.32MB	171.18KB	2.49MB	fineweb-2
mfz_Latn	231	22	253	147420	14039	161459	1.08MB	71.86KB	1.15MB	fineweb-2
aso_Latn	229	27	256	270781	31926	302707	1.91MB	294.98KB	2.20MB	fineweb-2
otw_Latn	229	22	251	75204	7224	82428	718.79KB	275.31KB	994.10KB	fineweb-2
crx_Latn	227	12	239	169341	8951	178292	1.27MB	105.36KB	1.37MB	fineweb-2
ojb_Cans	227	22	249	115470	11190	126660	1.91MB	166.77KB	2.08MB	fineweb-2
xsi_Latn	227	26	253	284324	32565	316889	1.83MB	198.00KB	2.02MB	fineweb-2
bud_Latn	226	16	242	158214	11238	169452	1.10MB	79.55KB	1.18MB	fineweb-2, mala
cjp_Latn	224	26	250	121369	14087	135456	912.10KB	67.01KB	979.11KB	fineweb-2
bhp_Latn	223	31	254	89538	12446	101984	593.60KB	306.12KB	899.72KB	fineweb-2
bjr_Latn	223	19	242	182623	15621	198244	1.68MB	169.47KB	1.85MB	fineweb-2, mala
kix_Latn	223	15	238	103406	6955	110361	839.10KB	68.98KB	908.08KB	fineweb-2
bzh_Latn	217	24	241	230218	25462	255680	1.44MB	172.54KB	1.61MB	fineweb-2
ktu_Latn	217	30	247	184729	25083	209812	799.30KB	503.01KB	1.27MB	fineweb-2, mala
cco_Latn	216	33	249	268799	41066	309865	2.26MB	414.85KB	2.67MB	fineweb-2
ipi_Latn	215	26	241	220758	26696	247454	1.65MB	165.28KB	1.81MB	fineweb-2
kud_Latn	215	20	235	139533	12979	152512	1.16MB	113.44KB	1.28MB	fineweb-2
msb_Latn	215	45	260	91259	20425	111684	637.05KB	226.80KB	863.85KB	fineweb-2, mala
mxq_Latn	215	43	258	96275	19255	115530	1.00MB	116.58KB	1.12MB	fineweb-2
sbl_Latn	213	16	229	176455	13254	189709	1.22MB	83.89KB	1.30MB	fineweb-2
kyz_Latn	212	19	231	154880	13880	168760	1.09MB	163.93KB	1.25MB	fineweb-2
kij_Latn	210	12	222	127619	7292	134911	1.09MB	68.10KB	1.15MB	fineweb-2
ter_Latn	210	13	223	144573	8949	153522	1.34MB	85.16KB	1.42MB	fineweb-2
toh_Latn	210	24	234	125466	14339	139805	854.52KB	109.65KB	964.18KB	fineweb-2
amm_Latn	209	16	225	207232	15864	223096	1.45MB	107.21KB	1.55MB	fineweb-2
esk_Latn	209	26	235	152770	19004	171774	1.19MB	1.18MB	2.37MB	fineweb-2
mtp_Latn	209	23	232	141670	15590	157260	1.29MB	162.41KB	1.45MB	fineweb-2
kxc_Ethi	208	43	251	112413	23239	135652	1.23MB	298.91KB	1.52MB	fineweb-2
cko_Latn	207	31	238	173518	25985	199503	940.04KB	118.55KB	1.03MB	fineweb-2
mmx_Latn	207	30	237	176234	25541	201775	1.05MB	169.88KB	1.22MB	fineweb-2
ngl_Latn	207	15	222	54549	3733	58282	535.63KB	70.78KB	606.41KB	fineweb-2, mala
usp_Latn	207	21	228	185408	18809	204217	1.32MB	139.65KB	1.46MB	fineweb-2
tcs_Latn	206	23	229	96355	10758	107113	613.34KB	101.35KB	714.69KB	fineweb-2
tiy_Latn	206	24	230	91832	10698	102530	677.41KB	109.24KB	786.66KB	fineweb-2
gnd_Latn	205	33	238	283668	45663	329331	1.49MB	298.61KB	1.79MB	fineweb-2
ken_Latn	205	37	242	94180	17014	111194	790.06KB	173.17KB	963.23KB	fineweb-2, mala
txu_Latn	205	39	244	247417	47069	294486	1.49MB	284.61KB	1.77MB	fineweb-2
kzj_Latn	204	21	225	80433	8279	88712	655.53KB	93.28KB	748.80KB	fineweb-2
lmp_Latn	204	26	230	113049	14432	127481	708.99KB	101.89KB	810.87KB	fineweb-2, mala
byx_Latn	199	34	233	250049	42721	292770	1.39MB	195.86KB	1.58MB	fineweb-2
gng_Latn	199	24	223	125410	15124	140534	804.18KB	68.58KB	872.76KB	fineweb-2
ccp_Latn	197	32	229	46867	7612	54479	401.59KB	185.19KB	586.78KB	fineweb-2
pab_Latn	197	24	221	99745	12151	111896	1015.40KB	80.93KB	1.07MB	fineweb-2
lia_Latn	196	20	216	178074	18170	196244	1.13MB	131.28KB	1.25MB	fineweb-2
nop_Latn	196	16	212	144851	11824	156675	1.18MB	116.64KB	1.29MB	fineweb-2
pad_Latn	195	26	221	165698	22093	187791	1.48MB	236.38KB	1.71MB	fineweb-2
taq_Tfng	195	13	208	67386	4492	71878	1006.25KB	164.80KB	1.14MB	fineweb-2
buk_Latn	194	32	226	185382	30578	215960	1.14MB	213.90KB	1.35MB	fineweb-2
kca_Cyrl	194	24	218	89747	11102	100849	1.05MB	201.05KB	1.25MB	fineweb-2
hvn_Latn	193	33	226	212600	34195	246795	1.06MB	290.71KB	1.35MB	fineweb-2, mala
kss_Latn	193	21	214	78755	8569	87324	562.63KB	108.11KB	670.74KB	fineweb-2
mhl_Latn	193	11	204	177797	10133	187930	1.21MB	65.86KB	1.27MB	fineweb-2
kas_Cyrl	193	25	218	8206	1063	9269	203.25KB	34.67KB	237.92KB	mala
tby_Latn	192	15	207	152527	11916	164443	1.12MB	63.75KB	1.18MB	fineweb-2
khz_Latn	191	21	212	148024	16274	164298	1.15MB	138.15KB	1.28MB	fineweb-2
yle_Latn	191	21	212	187319	20595	207914	1.24MB	102.54KB	1.34MB	fineweb-2
ain_Latn	190	20	210	62553	6584	69137	496.31KB	136.41KB	632.72KB	fineweb-2
ifb_Latn	189	16	205	147762	12508	160270	1002.73KB	81.79KB	1.06MB	fineweb-2
kmo_Latn	189	14	203	169237	12536	181773	1.14MB	85.34KB	1.22MB	fineweb-2
meq_Latn	189	29	218	197042	30233	227275	1.14MB	168.70KB	1.30MB	fineweb-2
yml_Latn	189	20	209	181391	19194	200585	1.37MB	185.92KB	1.56MB	fineweb-2
niq_Latn	189	29	218	84163	12914	97077	607.45KB	155.69KB	763.14KB	mala
pem_Latn	187	23	210	44109	5425	49534	417.10KB	78.46KB	495.56KB	fineweb-2
dah_Latn	186	30	216	227675	36721	264396	1.49MB	250.59KB	1.73MB	fineweb-2
kze_Latn	185	22	207	122095	14519	136614	1.37MB	168.20KB	1.53MB	fineweb-2
mva_Latn	184	19	203	168205	17369	185574	1.25MB	122.97KB	1.37MB	fineweb-2
dgr_Latn	183	19	202	93045	9660	102705	964.07KB	84.71KB	1.02MB	fineweb-2
zpv_Latn	183	17	200	160966	14953	175919	1.19MB	130.89KB	1.32MB	fineweb-2
aly_Latn	182	22	204	100826	12187	113013	1.12MB	149.24KB	1.27MB	fineweb-2
cso_Latn	182	24	206	177583	23417	201000	1.58MB	200.36KB	1.78MB	fineweb-2
gaw_Latn	182	25	207	176608	24259	200867	1.14MB	183.41KB	1.32MB	fineweb-2
snp_Latn	182	19	201	179087	18695	197782	1.22MB	100.88KB	1.31MB	fineweb-2
kmu_Latn	181	23	204	155949	18141	174090	1.31MB	156.81KB	1.46MB	fineweb-2, mala
ksr_Latn	181	20	201	170185	18804	188989	1.33MB	188.03KB	1.51MB	fineweb-2
opm_Latn	180	23	203	199532	25495	225027	1.29MB	105.51KB	1.39MB	fineweb-2
bch_Latn	178	35	213	170231	33472	203703	1.01MB	249.49KB	1.26MB	fineweb-2
xtn_Latn	178	25	203	191535	26901	218436	840.39KB	400.39KB	1.21MB	fineweb-2
wuv_Latn	177	13	190	132938	9763	142701	933.82KB	61.75KB	995.57KB	fineweb-2
rki_Mymr	177	28	205	73302	11595	84897	2.34MB	800.07KB	3.12MB	mala
heg_Latn	175	10	185	185255	10586	195841	1.00MB	88.23KB	1.09MB	fineweb-2
muh_Latn	175	23	198	256339	33690	290029	1.30MB	185.59KB	1.48MB	fineweb-2
kpw_Latn	174	21	195	198821	23995	222816	1.23MB	131.18KB	1.35MB	fineweb-2
leu_Latn	174	10	184	176847	10163	187010	1.00MB	88.81KB	1.09MB	fineweb-2
ogo_Latn	174	15	189	62473	5385	67858	353.57KB	171.63KB	525.20KB	fineweb-2
rro_Latn	174	12	186	129342	8920	138262	964.01KB	102.50KB	1.04MB	fineweb-2
rtm_Latn	174	29	203	143543	23923	167466	822.23KB	191.81KB	1014.04KB	fineweb-2
kpv_Latn	174	58	232	3154	1051	4205	123.11KB	46.96KB	170.07KB	mala
hrx_Latn	173	26	199	164589	24735	189324	572.75KB	873.60KB	1.41MB	fineweb-2
kwj_Latn	172	26	198	172598	26090	198688	1.14MB	68.73KB	1.21MB	fineweb-2
kpf_Latn	171	13	184	113500	8628	122128	947.31KB	24.85KB	972.16KB	fineweb-2
tte_Latn	171	12	183	133057	9337	142394	1.04MB	143.96KB	1.18MB	fineweb-2
nim_Latn	170	18	188	77034	8156	85190	753.80KB	87.44KB	841.24KB	fineweb-2
yuw_Latn	170	16	186	161230	15174	176404	1.16MB	128.46KB	1.29MB	fineweb-2
aoz_Latn	169	17	186	42393	4264	46657	291.74KB	132.27KB	424.02KB	fineweb-2
akb_Latn	168	26	194	134791	20671	155462	784.02KB	282.03KB	1.04MB	fineweb-2, mala
gnw_Latn	166	15	181	93254	8426	101680	736.86KB	42.42KB	779.28KB	fineweb-2
keo_Latn	165	18	183	142017	15492	157509	853.50KB	114.16KB	967.66KB	fineweb-2
bhw_Latn	163	18	181	24808	2739	27547	286.55KB	29.51KB	316.06KB	fineweb-2
caq_Latn	163	13	176	79862	6369	86231	629.34KB	35.86KB	665.20KB	fineweb-2
ceg_Latn	163	19	182	142264	16582	158846	972.21KB	122.30KB	1.07MB	fineweb-2
mrw_Latn	163	22	185	345579	46511	392090	1.68MB	721.33KB	2.38MB	fineweb-2, mala
ybb_Latn	162	32	194	74490	14692	89182	518.86KB	276.08KB	794.94KB	fineweb-2, mala
crm_Cans	161	19	180	95765	11301	107066	1.22MB	141.60KB	1.36MB	fineweb-2
laj_Latn	161	28	189	148550	24093	172643	702.05KB	273.48KB	975.54KB	fineweb-2, mala
mxv_Latn	161	19	180	77306	9123	86429	584.50KB	163.08KB	747.58KB	fineweb-2
ubr_Latn	161	19	180	135582	16000	151582	946.75KB	66.99KB	1013.74KB	fineweb-2
tvk_Latn	160	41	201	147377	37765	185142	953.11KB	149.78KB	1.08MB	fineweb-2
czt_Latn	159	28	187	43109	7591	50700	406.71KB	114.16KB	520.87KB	fineweb-2
dje_Latn	159	18	177	110598	10358	120956	547.55KB	170.84KB	718.38KB	fineweb-2, mala
zac_Latn	159	26	185	87547	14316	101863	760.29KB	49.57KB	809.86KB	fineweb-2
gso_Latn	158	25	183	136346	21573	157919	787.14KB	131.32KB	918.46KB	fineweb-2
hae_Latn	158	11	169	61667	4293	65960	584.60KB	91.05KB	675.65KB	fineweb-2
kjs_Latn	158	35	193	120048	26592	146640	952.36KB	133.89KB	1.06MB	fineweb-2
xmv_Latn	158	23	181	55791	8121	63912	498.66KB	79.25KB	577.91KB	fineweb-2
nwi_Latn	157	27	184	143258	23468	166726	1.12MB	184.98KB	1.30MB	fineweb-2, mala
ann_Latn	156	22	178	59991	9534	69525	425.96KB	120.95KB	546.91KB	fineweb-2, mala
wrs_Latn	156	14	170	127933	11481	139414	1.03MB	61.19KB	1.09MB	fineweb-2
zpc_Latn	156	22	178	125516	17701	143217	1.05MB	154.63KB	1.21MB	fineweb-2
gld_Cyrl	156	24	180	6563	1009	7572	149.07KB	32.87KB	181.94KB	mala
bzi_Thai	155	24	179	89371	14973	104344	1.48MB	165.17KB	1.64MB	fineweb-2, mala
guk_Ethi	155	21	176	80173	10862	91035	1001.99KB	139.72KB	1.11MB	fineweb-2
nhy_Latn	155	27	182	94279	16422	110701	836.21KB	100.31KB	936.52KB	fineweb-2
mca_Latn	154	16	170	61781	6418	68199	508.18KB	95.54KB	603.71KB	fineweb-2
uvh_Latn	154	15	169	193526	18850	212376	1.28MB	130.04KB	1.40MB	fineweb-2
cap_Latn	153	19	172	80819	10036	90855	858.59KB	85.05KB	943.64KB	fineweb-2
lwg_Latn	153	14	167	36202	3312	39514	295.18KB	134.26KB	429.45KB	fineweb-2
mox_Latn	153	18	171	142528	16768	159296	1.02MB	127.82KB	1.15MB	fineweb-2
dga_Latn	152	16	168	68861	7248	76109	423.92KB	88.76KB	512.68KB	fineweb-2
kak_Latn	152	45	197	13042	5848	18890	126.69KB	67.20KB	193.89KB	fineweb-2, mala
kne_Latn	152	14	166	78409	7221	85630	563.25KB	36.88KB	600.13KB	fineweb-2
lac_Latn	152	26	178	180393	30856	211249	1.13MB	89.17KB	1.21MB	fineweb-2
awx_Latn	151	32	183	71974	15252	87226	661.11KB	172.65KB	833.76KB	fineweb-2
bim_Latn	151	16	167	226191	39701	265892	822.90KB	595.23KB	1.38MB	fineweb-2, mala
mej_Latn	151	27	178	118248	21143	139391	812.45KB	105.64KB	918.10KB	fineweb-2
ppo_Latn	151	17	168	151323	17036	168359	1.11MB	121.81KB	1.22MB	fineweb-2
msm_Latn	151	25	176	161697	26771	188468	973.80KB	176.13KB	1.12MB	mala
uvl_Latn	150	12	162	170264	13621	183885	1002.38KB	82.28KB	1.06MB	fineweb-2
wos_Latn	150	26	176	166950	28938	195888	1.14MB	231.75KB	1.37MB	fineweb-2
rav_Deva	149	20	169	81433	10930	92363	1.60MB	220.95KB	1.81MB	fineweb-2
caf_Latn	148	15	163	107121	10856	117977	859.10KB	82.67KB	941.77KB	fineweb-2
ong_Latn	148	8	156	164656	8900	173556	993.02KB	51.53KB	1.02MB	fineweb-2
aba_Latn	147	15	162	30869	3149	34018	306.24KB	41.21KB	347.45KB	fineweb-2
amn_Latn	147	10	157	127778	8692	136470	856.71KB	56.60KB	913.32KB	fineweb-2
chf_Latn	147	15	162	120230	12268	132498	814.15KB	34.95KB	849.10KB	fineweb-2
mek_Latn	147	12	159	137787	11247	149034	971.83KB	77.02KB	1.02MB	fineweb-2
snc_Latn	147	12	159	114257	9327	123584	1005.03KB	71.85KB	1.05MB	fineweb-2
nin_Latn	146	35	181	89748	21515	111263	605.33KB	188.08KB	793.42KB	fineweb-2
tkl_Latn	146	19	165	59760	7776	67536	361.10KB	101.12KB	462.23KB	fineweb-2
arq_Arab	145	22	167	41883	6354	48237	610.48KB	155.63KB	766.10KB	fineweb-2
npl_Latn	145	17	162	76510	8970	85480	715.09KB	50.12KB	765.21KB	fineweb-2
mic_Latn	144	8	152	79622	4423	84045	707.39KB	73.25KB	780.65KB	fineweb-2
txq_Latn	144	12	156	90259	7521	97780	604.04KB	66.10KB	670.14KB	fineweb-2
crk_Cans	142	12	154	50547	4271	54818	899.40KB	99.81KB	999.21KB	fineweb-2
zaw_Latn	141	33	174	39111	9152	48263	402.67KB	66.19KB	468.85KB	fineweb-2, mala
cdo_Hani	141	21	162	3327	495	3822	298.21KB	92.21KB	390.42KB	mala
abz_Latn	140	16	156	22336	2552	24888	228.86KB	34.62KB	263.49KB	fineweb-2
lew_Latn	140	22	162	26356	4141	30497	277.98KB	42.94KB	320.92KB	fineweb-2
rmq_Latn	140	21	161	53408	8011	61419	402.22KB	142.87KB	545.09KB	fineweb-2
kms_Latn	139	27	166	147141	28717	175858	869.90KB	136.69KB	1006.59KB	fineweb-2, mala
sll_Latn	137	14	151	139491	14254	153745	846.39KB	79.34KB	925.74KB	fineweb-2
nss_Latn	136	33	169	79477	19285	98762	628.02KB	118.92KB	746.93KB	fineweb-2
plu_Latn	135	17	152	72675	9151	81826	571.38KB	96.59KB	667.97KB	fineweb-2
zab_Latn	135	36	171	86145	22972	109117	519.67KB	277.21KB	796.88KB	fineweb-2
zat_Latn	135	28	163	101011	20950	121961	726.33KB	183.85KB	910.18KB	fineweb-2
thl_Deva	135	16	151	13955	1653	15608	256.72KB	31.76KB	288.49KB	mala
mie_Latn	134	18	152	78956	10606	89562	637.86KB	102.80KB	740.66KB	fineweb-2
cek_Latn	133	23	156	88976	15386	104362	536.98KB	149.08KB	686.06KB	fineweb-2
snf_Latn	133	21	154	91206	14401	105607	750.90KB	79.77KB	830.67KB	fineweb-2
bef_Latn	132	18	150	102649	14064	116713	800.75KB	105.83KB	906.58KB	fineweb-2, mala
caa_Latn	132	17	149	123093	15853	138946	891.86KB	66.67KB	958.54KB	fineweb-2
gvf_Latn	131	24	155	149238	27341	176579	905.83KB	167.13KB	1.05MB	fineweb-2
hub_Latn	131	11	142	60612	5089	65701	571.29KB	52.24KB	623.53KB	fineweb-2
mbl_Latn	131	14	145	104538	11172	115710	785.83KB	50.87KB	836.70KB	fineweb-2
kqw_Latn	130	22	152	133357	22568	155925	710.92KB	126.73KB	837.64KB	fineweb-2
mbs_Latn	130	15	145	78631	9072	87703	494.85KB	121.46KB	616.32KB	fineweb-2
nus_Latn	130	22	152	74783	12655	87438	497.01KB	129.26KB	626.27KB	fineweb-2
aia_Latn	129	17	146	102946	13644	116590	713.57KB	109.60KB	823.18KB	fineweb-2, mala
nsn_Latn	129	29	158	95016	20991	116007	726.95KB	83.51KB	810.46KB	fineweb-2, mala
wer_Latn	128	18	146	108760	15294	124054	788.69KB	116.94KB	905.63KB	fineweb-2
agm_Latn	127	18	145	137922	19548	157470	1.40MB	245.51KB	1.64MB	fineweb-2
guo_Latn	127	13	140	81651	8358	90009	811.72KB	85.46KB	897.18KB	fineweb-2
mav_Latn	127	15	142	91900	10854	102754	683.42KB	71.02KB	754.45KB	fineweb-2
prg_Latn	127	9	136	26949	1909	28858	284.00KB	36.08KB	320.08KB	fineweb-2
kpx_Latn	126	8	134	94883	6024	100907	708.58KB	40.02KB	748.60KB	fineweb-2
mqy_Latn	126	24	150	92187	17474	109661	613.09KB	111.41KB	724.50KB	fineweb-2, mala
mti_Latn	124	12	136	90014	8711	98725	635.31KB	55.31KB	690.63KB	fineweb-2
nab_Latn	124	18	142	61337	8903	70240	1.39MB	222.24KB	1.61MB	fineweb-2
otm_Latn	124	12	136	124404	12039	136443	745.26KB	83.43KB	828.69KB	fineweb-2
xuo_Latn	124	10	134	19503	1572	21075	175.58KB	25.94KB	201.52KB	fineweb-2
auy_Latn	123	16	139	87042	11322	98364	983.30KB	127.58KB	1.08MB	fineweb-2
tbo_Latn	123	12	135	95871	9353	105224	662.65KB	76.99KB	739.64KB	fineweb-2
nii_Latn	122	17	139	159988	22293	182281	897.67KB	144.56KB	1.02MB	fineweb-2
row_Latn	122	15	137	117742	14476	132218	789.50KB	103.22KB	892.73KB	fineweb-2
zgh_Tfng	122	12	134	42572	4187	46759	597.23KB	132.10KB	729.33KB	fineweb-2
nog_Latn	122	46	168	3472	1309	4781	277.77KB	91.88KB	369.65KB	mala
nhg_Latn	121	32	153	67272	17790	85062	658.33KB	97.65KB	755.98KB	fineweb-2
ssg_Latn	121	10	131	102724	8489	111213	666.07KB	58.35KB	724.41KB	fineweb-2
kwd_Latn	120	30	150	120755	26656	147411	716.95KB	211.09KB	928.04KB	fineweb-2, mala
oji_Latn	120	15	135	30798	3849	34647	321.23KB	61.82KB	383.05KB	mala
big_Latn	119	20	139	99466	16717	116183	761.89KB	156.15KB	918.04KB	fineweb-2
bjp_Latn	119	36	155	101229	30623	131852	622.86KB	136.37KB	759.23KB	fineweb-2
wnc_Latn	119	13	132	121108	13230	134338	910.10KB	116.88KB	1.00MB	fineweb-2
bqp_Latn	118	23	141	73119	14252	87371	496.57KB	133.72KB	630.29KB	fineweb-2
cto_Latn	118	9	127	49665	3788	53453	513.72KB	41.27KB	555.00KB	fineweb-2
mcd_Latn	118	10	128	87080	7379	94459	705.14KB	101.33KB	806.48KB	fineweb-2
mcp_Latn	118	17	135	47667	6867	54534	356.91KB	87.43KB	444.35KB	fineweb-2
mur_Latn	117	12	129	88489	9075	97564	601.18KB	64.70KB	665.88KB	fineweb-2
ady_Latn	117	48	165	3508	1439	4947	65.51KB	48.12KB	113.63KB	mala
guz_Latn	116	8	124	33336	2299	35635	262.97KB	100.64KB	363.61KB	fineweb-2
mee_Latn	116	14	130	138131	16670	154801	848.84KB	102.09KB	950.93KB	fineweb-2
wls_Latn	116	17	133	33940	4973	38913	241.33KB	87.95KB	329.28KB	fineweb-2
adh_Latn	115	27	142	41561	9979	51540	298.83KB	95.87KB	394.70KB	fineweb-2, mala
emi_Latn	115	10	125	84165	7318	91483	587.24KB	56.38KB	643.62KB	fineweb-2
idu_Latn	115	36	151	56462	17675	74137	256.36KB	304.44KB	560.80KB	fineweb-2
yut_Latn	115	15	130	106666	13913	120579	760.39KB	102.74KB	863.13KB	fineweb-2
chy_Latn	115	20	135	2496	434	2930	62.49KB	17.42KB	79.91KB	mala
lbb_Latn	114	20	134	106267	18643	124910	573.44KB	127.45KB	700.89KB	fineweb-2
apy_Latn	113	19	132	77454	13023	90477	645.16KB	113.39KB	758.56KB	fineweb-2
chr_Latn	113	16	129	83431	12104	95535	86.69KB	274.35KB	361.04KB	fineweb-2, mala
icr_Latn	113	22	135	36912	7186	44098	226.21KB	139.04KB	365.24KB	fineweb-2
aca_Latn	112	20	132	62777	11210	73987	648.04KB	89.10KB	737.14KB	fineweb-2
mqj_Latn	112	14	126	66243	8293	74536	502.33KB	156.44KB	658.77KB	fineweb-2, mala
kxm_Thai	111	18	129	18074	2930	21004	260.94KB	117.69KB	378.63KB	fineweb-2
vid_Latn	111	7	118	68049	4291	72340	520.97KB	56.32KB	577.28KB	fineweb-2
azg_Latn	110	12	122	109889	11987	121876	913.96KB	97.99KB	1011.95KB	fineweb-2
liv_Latn	110	15	125	46761	6376	53137	302.10KB	183.83KB	485.93KB	fineweb-2
ncl_Latn	110	13	123	87882	10386	98268	771.30KB	38.58KB	809.89KB	fineweb-2
pao_Latn	110	24	134	82160	17925	100085	564.15KB	173.33KB	737.48KB	fineweb-2
ctp_Latn	108	10	118	136266	12617	148883	911.09KB	74.69KB	985.78KB	fineweb-2
eve_Cyrl	108	22	130	47751	8288	56039	513.09KB	408.79KB	921.87KB	fineweb-2, mala
tgp_Latn	108	16	124	95058	14082	109140	590.86KB	93.22KB	684.07KB	fineweb-2
tif_Latn	108	9	117	164184	13682	177866	1.13MB	36.82KB	1.16MB	fineweb-2
usa_Latn	108	18	126	80286	13381	93667	883.82KB	145.25KB	1.00MB	fineweb-2
wsk_Latn	108	11	119	82748	8428	91176	549.33KB	45.99KB	595.32KB	fineweb-2
lad_Hebr	107	21	128	23880	4613	28493	206.32KB	69.67KB	275.98KB	fineweb-2, mala
kbm_Latn	106	14	120	144561	19092	163653	837.97KB	129.43KB	967.40KB	fineweb-2
kim_Cyrl	105	32	137	2459	749	3208	66.39KB	25.45KB	91.84KB	mala
kbc_Latn	104	9	113	72152	6244	78396	789.94KB	61.46KB	851.40KB	fineweb-2
kog_Latn	104	9	113	28259	2445	30704	284.21KB	74.69KB	358.91KB	fineweb-2
zos_Latn	104	8	112	62951	4842	67793	550.49KB	51.93KB	602.42KB	fineweb-2
gbm_Deva	104	15	119	74186	10700	84886	937.28KB	168.72KB	1.08MB	mala
abn_Latn	103	10	113	21350	2072	23422	229.83KB	34.41KB	264.24KB	fineweb-2
apb_Latn	103	17	120	110921	18307	129228	668.23KB	116.72KB	784.95KB	fineweb-2
mwp_Latn	103	13	116	89721	11324	101045	714.26KB	90.98KB	805.25KB	fineweb-2
okv_Latn	103	9	112	76563	6690	83253	481.82KB	39.29KB	521.11KB	fineweb-2
bsq_Latn	102	15	117	45746	6727	52473	386.71KB	85.70KB	472.41KB	fineweb-2
csw_Latn	102	16	118	39734	6185	45919	404.79KB	183.49KB	588.28KB	fineweb-2, mala
gdn_Latn	102	13	115	115989	14782	130771	777.23KB	99.44KB	876.67KB	fineweb-2
mih_Latn	102	12	114	54750	6441	61191	336.26KB	49.25KB	385.51KB	fineweb-2
qvc_Latn	102	12	114	67756	8319	76075	644.86KB	156.53KB	801.39KB	fineweb-2, mala
wbp_Latn	102	14	116	67363	9246	76609	801.87KB	95.37KB	897.24KB	fineweb-2
xsr_Deva	102	15	117	78264	11509	89773	1.33MB	164.64KB	1.49MB	fineweb-2
llg_Latn	101	10	111	106030	10544	116574	699.42KB	48.86KB	748.28KB	fineweb-2, mala
sgz_Latn	101	18	119	107020	19072	126092	687.01KB	67.68KB	754.69KB	fineweb-2
tgo_Latn	101	14	115	71823	9955	81778	540.97KB	81.15KB	622.12KB	fineweb-2
leh_Latn	100	12	112	16237	1948	18185	197.90KB	36.16KB	234.06KB	fineweb-2
roo_Latn	100	16	116	102935	16599	119534	747.23KB	109.02KB	856.26KB	fineweb-2, mala
tod_Latn	100	14	114	68888	9700	78588	494.66KB	70.30KB	564.96KB	fineweb-2, mala
zpm_Latn	100	15	115	142545	21381	163926	379.97KB	829.40KB	1.18MB	fineweb-2
eko_Latn	97	8	105	53450	4408	57858	481.00KB	33.02KB	514.02KB	fineweb-2
kyf_Latn	96	14	110	76621	11174	87795	510.27KB	92.50KB	602.77KB	fineweb-2
moh_Latn	96	17	113	56890	10074	66964	442.21KB	163.03KB	605.23KB	fineweb-2
nsu_Latn	96	16	112	97985	16330	114315	417.82KB	570.76KB	988.58KB	fineweb-2
gmv_Ethi	95	15	110	47661	7525	55186	572.21KB	87.06KB	659.27KB	fineweb-2
kck_Latn	95	26	121	21184	5797	26981	255.74KB	47.71KB	303.45KB	fineweb-2
pbb_Latn	95	8	103	32000	2694	34694	295.98KB	78.75KB	374.72KB	fineweb-2
xtd_Latn	95	11	106	63191	7316	70507	512.68KB	47.00KB	559.67KB	fineweb-2
mhx_Latn	94	15	109	216869	24308	241177	1.29MB	22.68KB	1.32MB	fineweb-2, mala
rng_Latn	94	9	103	70509	6750	77259	204.06KB	419.71KB	623.77KB	fineweb-2
dng_Cyrl	93	14	107	33373	5024	38397	428.48KB	53.74KB	482.22KB	fineweb-2
ino_Latn	93	15	108	91085	14820	105905	684.79KB	162.40KB	847.19KB	fineweb-2, mala
iou_Latn	93	10	103	79938	8595	88533	680.12KB	51.12KB	731.24KB	fineweb-2
ndh_Latn	93	10	103	48110	5173	53283	425.33KB	49.74KB	475.06KB	fineweb-2
cof_Latn	92	7	99	60153	4576	64729	495.45KB	71.94KB	567.39KB	fineweb-2
kje_Latn	92	7	99	79447	6044	85491	553.33KB	23.53KB	576.86KB	fineweb-2
omw_Latn	92	14	106	84932	12260	97192	675.14KB	90.43KB	765.58KB	fineweb-2, mala
cao_Latn	91	22	113	64703	15642	80345	505.83KB	73.44KB	579.28KB	fineweb-2
hot_Latn	91	12	103	68241	8998	77239	477.72KB	65.96KB	543.69KB	fineweb-2
tnk_Latn	91	15	106	83074	13693	96767	611.95KB	130.46KB	742.41KB	fineweb-2
cbr_Latn	90	15	105	66156	10329	76485	512.25KB	88.53KB	600.78KB	fineweb-2, mala
eri_Latn	90	28	118	45808	14251	60059	390.71KB	67.39KB	458.10KB	fineweb-2
naf_Latn	90	19	109	82340	17383	99723	614.50KB	172.33KB	786.83KB	fineweb-2
qxn_Latn	90	8	98	50315	4472	54787	553.07KB	51.31KB	604.39KB	fineweb-2
myx_Latn	89	14	103	26744	4239	30983	178.37KB	136.70KB	315.06KB	fineweb-2, mala
kmz_Latn	89	12	101	85364	11509	96873	373.62KB	277.85KB	651.48KB	mala
bvd_Latn	88	13	101	84175	12435	96610	486.05KB	59.50KB	545.55KB	fineweb-2
msk_Latn	88	8	96	71552	6504	78056	509.67KB	24.90KB	534.57KB	fineweb-2
mvp_Latn	88	21	109	67840	16189	84029	176.71KB	551.11KB	727.82KB	fineweb-2
nmf_Latn	88	11	99	48454	6056	54510	395.41KB	109.25KB	504.66KB	fineweb-2
tih_Latn	88	24	112	13651	3723	17374	146.52KB	81.85KB	228.37KB	fineweb-2
agt_Latn	87	18	105	43268	8952	52220	276.72KB	72.63KB	349.36KB	fineweb-2
lww_Latn	87	8	95	111490	10252	121742	683.36KB	60.29KB	743.65KB	fineweb-2
mnk_Latn	87	19	106	140247	30628	170875	772.21KB	230.27KB	1002.48KB	fineweb-2
mpm_Latn	87	16	103	98668	18146	116814	546.70KB	112.08KB	658.79KB	fineweb-2
clu_Latn	86	13	99	41465	6268	47733	307.03KB	44.93KB	351.96KB	fineweb-2
djr_Latn	86	6	92	160188	11175	171363	1.06MB	497.84KB	1.54MB	fineweb-2
wed_Latn	86	12	98	35123	4900	40023	269.87KB	43.28KB	313.15KB	fineweb-2
kbh_Latn	85	13	98	57497	8793	66290	606.08KB	89.12KB	695.20KB	fineweb-2
oke_Latn	85	21	106	17749	4385	22134	160.08KB	83.03KB	243.12KB	fineweb-2
ptp_Latn	85	7	92	96560	7952	104512	540.86KB	49.82KB	590.68KB	fineweb-2
amr_Latn	84	9	93	49509	5304	54813	571.92KB	68.71KB	640.63KB	fineweb-2
dak_Latn	84	33	117	273405	107409	380814	1.67MB	777.58KB	2.43MB	fineweb-2
nhk_Latn	83	21	104	13880	3512	17392	163.05KB	50.08KB	213.13KB	fineweb-2
nuy_Latn	83	9	92	47609	5162	52771	637.06KB	67.90KB	704.96KB	fineweb-2
huv_Latn	82	15	97	48487	8869	57356	393.16KB	82.53KB	475.69KB	fineweb-2
blw_Latn	81	18	99	64417	14315	78732	391.80KB	164.37KB	556.17KB	fineweb-2
nbq_Latn	81	15	96	83802	15519	99321	616.04KB	97.04KB	713.08KB	fineweb-2
ria_Latn	81	17	98	31485	6608	38093	182.16KB	111.02KB	293.18KB	fineweb-2
zar_Latn	81	17	98	66697	13998	80695	513.53KB	138.82KB	652.35KB	fineweb-2
bzd_Latn	80	12	92	59600	8940	68540	434.51KB	56.62KB	491.13KB	fineweb-2
cbi_Latn	80	7	87	73151	6400	79551	423.18KB	228.28KB	651.46KB	fineweb-2
cta_Latn	80	20	100	120700	30175	150875	819.37KB	248.72KB	1.04MB	fineweb-2
poi_Latn	80	12	92	45729	6859	52588	420.66KB	83.80KB	504.46KB	fineweb-2
ati_Latn	79	16	95	22155	4487	26642	178.51KB	55.03KB	233.55KB	fineweb-2
bps_Latn	79	12	91	53808	8173	61981	312.44KB	83.57KB	396.01KB	fineweb-2
gvn_Latn	79	8	87	35488	3593	39081	356.51KB	34.61KB	391.12KB	fineweb-2
cag_Latn	78	7	85	34677	3112	37789	294.22KB	32.24KB	326.46KB	fineweb-2
yva_Latn	78	17	95	35499	7737	43236	300.72KB	59.09KB	359.81KB	fineweb-2
ayp_Arab	77	12	89	179724	28009	207733	1.72MB	280.45KB	2.00MB	fineweb-2
kwf_Latn	77	15	92	82482	16068	98550	541.59KB	117.34KB	658.94KB	fineweb-2
lmk_Latn	77	16	93	100508	20884	121392	447.70KB	498.39KB	946.09KB	fineweb-2
sby_Latn	77	7	84	10543	958	11501	142.56KB	19.53KB	162.09KB	fineweb-2
bbb_Latn	76	16	92	70105	14758	84863	427.67KB	106.15KB	533.82KB	fineweb-2
cwe_Latn	76	13	89	46242	7909	54151	383.22KB	62.01KB	445.23KB	fineweb-2
gam_Latn	76	6	82	84589	6678	91267	473.90KB	45.68KB	519.59KB	fineweb-2
too_Latn	76	9	85	52562	6224	58786	519.34KB	79.24KB	598.58KB	fineweb-2
kdc_Latn	75	10	85	38667	5155	43822	319.07KB	49.27KB	368.34KB	fineweb-2
aoi_Latn	74	14	88	71415	13511	84926	887.48KB	166.91KB	1.03MB	fineweb-2
cri_Latn	74	28	102	17834	6748	24582	126.35KB	109.63KB	235.98KB	fineweb-2
tim_Latn	74	19	93	62747	16110	78857	499.25KB	133.79KB	633.04KB	fineweb-2
atd_Latn	73	18	91	40721	10041	50762	322.82KB	42.44KB	365.26KB	fineweb-2
cpb_Latn	73	13	86	27467	4891	32358	353.36KB	77.90KB	431.25KB	fineweb-2
kvn_Latn	73	9	82	42397	5227	47624	428.44KB	78.30KB	506.75KB	fineweb-2
lsi_Latn	73	24	97	42987	14461	57448	286.00KB	98.30KB	384.30KB	fineweb-2, mala
mbc_Latn	73	9	82	54276	6691	60967	484.54KB	73.73KB	558.27KB	fineweb-2
mfy_Latn	73	9	82	17697	2181	19878	176.15KB	31.70KB	207.85KB	fineweb-2
upv_Latn	73	8	81	56445	6185	62630	382.69KB	36.17KB	418.86KB	fineweb-2
xed_Latn	73	14	87	55240	10594	65834	348.56KB	74.74KB	423.29KB	fineweb-2
ese_Latn	72	14	86	79108	15382	94490	625.30KB	122.25KB	747.55KB	fineweb-2
geb_Latn	72	17	89	80678	19049	99727	508.33KB	155.27KB	663.60KB	fineweb-2
hnn_Latn	72	12	84	47214	7869	55083	362.28KB	59.37KB	421.65KB	fineweb-2
kqf_Latn	72	15	87	22555	4698	27253	188.83KB	42.27KB	231.10KB	fineweb-2
lcm_Latn	72	19	91	76490	20184	96674	446.43KB	128.90KB	575.33KB	fineweb-2
mbh_Latn	72	18	90	90796	22699	113495	491.16KB	133.04KB	624.20KB	fineweb-2
mwn_Latn	72	8	80	31340	3482	34822	168.93KB	136.66KB	305.59KB	fineweb-2
tku_Latn	72	9	81	33647	4205	37852	360.30KB	72.92KB	433.22KB	fineweb-2
btd_Latn	71	9	80	36052	4570	40622	145.42KB	197.24KB	342.66KB	fineweb-2
gdg_Latn	71	15	86	38884	8215	47099	311.50KB	134.13KB	445.62KB	fineweb-2
nuj_Latn	71	19	90	29023	7766	36789	274.08KB	71.85KB	345.93KB	fineweb-2
rkb_Latn	71	16	87	64268	14483	78751	558.09KB	132.44KB	690.53KB	fineweb-2
rml_Latn	71	12	83	43608	7370	50978	326.88KB	74.14KB	401.01KB	fineweb-2
zas_Latn	71	16	87	47401	10682	58083	368.55KB	66.34KB	434.89KB	fineweb-2
iws_Latn	70	16	86	79349	18137	97486	643.68KB	126.74KB	770.42KB	fineweb-2
snn_Latn	70	11	81	60456	9500	69956	517.81KB	99.65KB	617.45KB	fineweb-2
myk_Latn	69	15	84	54457	9432	63889	302.40KB	59.90KB	362.30KB	fineweb-2, mala
tar_Latn	69	12	81	14110	2453	16563	140.57KB	33.18KB	173.74KB	fineweb-2
ttc_Latn	69	9	78	50722	6616	57338	337.06KB	63.46KB	400.52KB	fineweb-2
tue_Latn	69	11	80	33653	5365	39018	398.63KB	55.61KB	454.24KB	fineweb-2
urt_Latn	69	13	82	60669	11430	72099	413.91KB	85.09KB	499.00KB	fineweb-2
apu_Latn	68	17	85	39845	9961	49806	412.05KB	104.55KB	516.60KB	fineweb-2
luc_Latn	68	13	81	64796	12387	77183	522.56KB	101.10KB	623.66KB	fineweb-2
mio_Latn	68	13	81	58328	11150	69478	328.69KB	65.60KB	394.29KB	fineweb-2
mto_Latn	68	20	88	52615	15475	68090	488.74KB	186.77KB	675.50KB	fineweb-2
mwc_Latn	68	6	74	34434	3038	37472	258.14KB	20.58KB	278.72KB	fineweb-2
nou_Latn	68	17	85	74917	18729	93646	441.29KB	121.69KB	562.98KB	fineweb-2
kjg_Laoo	68	13	81	54111	10344	64455	842.09KB	146.58KB	988.67KB	mala
adz_Latn	67	11	78	62397	10244	72641	408.74KB	66.66KB	475.40KB	fineweb-2
apz_Latn	67	19	86	68690	19479	88169	621.91KB	184.48KB	806.38KB	fineweb-2
kyg_Latn	66	12	78	48912	8893	57805	443.73KB	78.88KB	522.61KB	fineweb-2
nyf_Latn	66	9	75	24397	3326	27723	167.53KB	82.61KB	250.13KB	fineweb-2
arl_Latn	65	18	83	46090	12763	58853	474.12KB	137.69KB	611.81KB	fineweb-2
beq_Latn	65	19	84	41899	12247	54146	324.62KB	75.06KB	399.68KB	fineweb-2
cpc_Latn	65	9	74	27220	3769	30989	335.18KB	66.66KB	401.83KB	fineweb-2
ncu_Latn	65	18	83	51596	14288	65884	405.17KB	96.28KB	501.46KB	fineweb-2
yrb_Latn	65	11	76	68840	11649	80489	474.52KB	71.79KB	546.31KB	fineweb-2
xmm_Latn	64	21	85	13279	4180	17459	124.09KB	49.18KB	173.28KB	fineweb-2, mala
ava_Latn	64	11	75	13812	2374	16186	140.79KB	67.22KB	208.01KB	mala
bkd_Latn	63	11	74	38820	6778	45598	253.76KB	50.33KB	304.08KB	fineweb-2
fub_Latn	63	12	75	25398	4713	30111	170.58KB	81.63KB	252.21KB	fineweb-2, mala
nko_Latn	63	7	70	37707	4189	41896	332.00KB	33.49KB	365.49KB	fineweb-2
rel_Latn	63	15	78	19387	4580	23967	151.12KB	114.10KB	265.22KB	fineweb-2, mala
toc_Latn	63	9	72	47656	6808	54464	531.84KB	80.38KB	612.21KB	fineweb-2
kqc_Latn	62	6	68	48656	4708	53364	320.18KB	17.93KB	338.11KB	fineweb-2
nsm_Latn	62	13	75	17247	3616	20863	189.14KB	38.42KB	227.56KB	fineweb-2
tew_Latn	62	16	78	38595	9960	48555	439.50KB	137.80KB	577.30KB	fineweb-2
wmt_Latn	62	10	72	20600	3322	23922	228.17KB	63.36KB	291.52KB	fineweb-2
gan_Latn	62	12	74	5101	987	6088	69.58KB	47.60KB	117.19KB	mala
apw_Latn	61	12	73	32182	6331	38513	363.90KB	68.22KB	432.12KB	fineweb-2
cut_Latn	61	19	80	46148	14374	60522	299.50KB	106.41KB	405.92KB	fineweb-2
kaq_Latn	61	5	66	32657	2676	35333	279.40KB	28.33KB	307.73KB	fineweb-2
faa_Latn	60	12	72	46378	9275	55653	385.83KB	98.06KB	483.89KB	fineweb-2
sxb_Latn	60	10	70	40722	6793	47515	340.72KB	38.58KB	379.30KB	fineweb-2, mala
amk_Latn	59	9	68	44829	6838	51667	324.59KB	43.82KB	368.40KB	fineweb-2
kgk_Latn	59	5	64	35961	3047	39008	302.83KB	25.02KB	327.84KB	fineweb-2
lif_Limb	59	15	74	32316	8216	40532	737.06KB	224.41KB	961.47KB	fineweb-2
ssd_Latn	59	11	70	48320	9008	57328	329.80KB	63.88KB	393.68KB	fineweb-2
bwd_Latn	58	7	65	27811	3356	31167	265.76KB	14.70KB	280.45KB	fineweb-2
hch_Latn	58	10	68	18612	3208	21820	245.46KB	32.44KB	277.91KB	fineweb-2
ace_Arab	57	15	72	38419	10110	48529	142.57KB	67.86KB	210.44KB	fineweb-2
bhg_Latn	57	6	63	51074	5376	56450	371.07KB	18.28KB	389.35KB	fineweb-2
lif_Deva	57	8	65	36964	5188	42152	928.33KB	247.50KB	1.15MB	fineweb-2
not_Latn	57	12	69	26147	5504	31651	285.44KB	87.22KB	372.66KB	fineweb-2
tna_Latn	57	9	66	45486	7182	52668	320.46KB	61.70KB	382.16KB	fineweb-2
zxx_Latn	57	13	70	62713	14303	77016	268.20KB	479.49KB	747.69KB	mala
wnu_Latn	56	6	62	56292	6031	62323	342.65KB	49.59KB	392.24KB	fineweb-2
aui_Latn	55	12	67	15654	3415	19069	137.60KB	47.93KB	185.53KB	fineweb-2
kkl_Latn	55	8	63	47378	6891	54269	354.21KB	74.59KB	428.80KB	fineweb-2
ood_Latn	55	17	72	13958	4314	18272	115.13KB	49.30KB	164.43KB	fineweb-2
zpt_Latn	55	11	66	42390	8478	50868	274.17KB	59.47KB	333.64KB	fineweb-2
nkf_Latn	54	23	77	26533	11301	37834	185.78KB	107.82KB	293.60KB	fineweb-2
zpo_Latn	54	19	73	44574	15683	60257	193.00KB	160.85KB	353.85KB	fineweb-2
ajg_Latn	53	17	70	19057	6112	25169	99.08KB	99.80KB	198.88KB	fineweb-2
crk_Latn	53	11	64	36964	7671	44635	323.01KB	118.93KB	441.94KB	fineweb-2
stn_Latn	53	12	65	5844	1323	7167	86.89KB	27.08KB	113.97KB	fineweb-2
tuf_Latn	53	9	62	37665	6396	44061	285.09KB	44.51KB	329.60KB	fineweb-2
zao_Latn	53	10	63	35070	6616	41686	225.39KB	29.51KB	254.90KB	fineweb-2
avt_Latn	52	14	66	56216	15135	71351	380.01KB	86.27KB	466.28KB	fineweb-2
azz_Latn	52	21	73	34775	14044	48819	342.81KB	81.35KB	424.16KB	fineweb-2
huu_Latn	52	10	62	31390	6036	37426	273.19KB	77.63KB	350.82KB	fineweb-2
wba_Latn	52	7	59	7468	1005	8473	82.30KB	23.63KB	105.93KB	fineweb-2
yaq_Latn	52	8	60	48951	7530	56481	364.34KB	60.51KB	424.84KB	fineweb-2
mnw_Latn	52	14	66	103647	27904	131551	658.08KB	519.94KB	1.15MB	mala
amf_Latn	51	10	61	16224	3181	19405	157.22KB	39.36KB	196.58KB	fineweb-2
ifu_Latn	51	12	63	37597	8846	46443	285.03KB	26.71KB	311.74KB	fineweb-2
ige_Latn	51	8	59	32289	5065	37354	283.75KB	27.66KB	311.41KB	fineweb-2
qxo_Latn	51	6	57	19915	2342	22257	237.71KB	26.87KB	264.58KB	fineweb-2
lez_Latn	51	8	59	7100	1113	8213	89.94KB	27.65KB	117.59KB	mala
bla_Latn	50	6	56	17068	2048	19116	197.32KB	76.55KB	273.87KB	fineweb-2
lln_Latn	50	10	60	49499	9899	59398	273.43KB	110.37KB	383.80KB	fineweb-2
nhx_Latn	50	9	59	19834	3628	23462	210.02KB	30.04KB	240.06KB	fineweb-2, mala
rgu_Latn	50	7	57	45030	6304	51334	288.77KB	53.41KB	342.18KB	fineweb-2
trq_Latn	50	12	62	56299	13511	69810	388.40KB	114.04KB	502.43KB	fineweb-2
njn_Latn	49	10	59	11896	2427	14323	112.84KB	27.34KB	140.18KB	fineweb-2
syb_Latn	49	9	58	34486	6334	40820	281.93KB	31.22KB	313.14KB	fineweb-2
mck_Latn	48	16	64	24832	8277	33109	177.09KB	128.25KB	305.34KB	fineweb-2
mit_Latn	48	12	60	31549	7887	39436	310.88KB	52.16KB	363.04KB	fineweb-2
pot_Latn	48	7	55	55509	8095	63604	286.09KB	291.47KB	577.55KB	fineweb-2
sey_Latn	48	8	56	26895	4482	31377	269.32KB	58.50KB	327.81KB	fineweb-2
ake_Latn	47	12	59	31721	8099	39820	262.47KB	52.82KB	315.29KB	fineweb-2
cbt_Latn	47	5	52	28068	2986	31054	322.66KB	35.99KB	358.65KB	fineweb-2
hra_Latn	47	7	54	2533	377	2910	45.13KB	7.14KB	52.27KB	fineweb-2
kdl_Latn	47	13	60	43108	11923	55031	234.13KB	77.67KB	311.79KB	fineweb-2
law_Latn	47	7	54	21760	3240	25000	224.18KB	32.26KB	256.44KB	fineweb-2
lbj_Tibt	47	5	52	25342	2695	28037	650.87KB	33.35KB	684.22KB	fineweb-2
mjc_Latn	47	9	56	37205	7124	44329	239.15KB	35.12KB	274.27KB	fineweb-2
pov_Latn	47	9	56	14077	2695	16772	106.35KB	30.71KB	137.06KB	fineweb-2
zdj_Latn	47	14	61	11156	3323	14479	102.44KB	36.18KB	138.62KB	fineweb-2
daf_Latn	46	7	53	13790	2098	15888	125.42KB	15.45KB	140.87KB	fineweb-2
nif_Latn	46	10	56	54758	11904	66662	462.11KB	92.60KB	554.71KB	fineweb-2
prf_Latn	46	9	55	31339	6131	37470	202.99KB	50.91KB	253.90KB	fineweb-2
zaa_Latn	46	13	59	46176	13050	59226	297.81KB	93.79KB	391.60KB	fineweb-2
atg_Latn	45	8	53	34149	6070	40219	218.43KB	45.19KB	263.62KB	fineweb-2
bco_Latn	45	15	60	36719	12239	48958	382.21KB	142.45KB	524.66KB	fineweb-2
bxh_Latn	45	10	55	14355	3190	17545	112.46KB	34.34KB	146.80KB	fineweb-2
fan_Latn	45	16	61	9704	3450	13154	61.00KB	54.22KB	115.22KB	fineweb-2
far_Latn	45	5	50	45720	5080	50800	255.21KB	32.50KB	287.71KB	fineweb-2
llb_Latn	45	22	67	13877	6784	20661	117.41KB	99.09KB	216.50KB	fineweb-2
nki_Latn	45	10	55	96166	21370	117536	133.65KB	908.38KB	1.02MB	fineweb-2
sil_Latn	45	17	62	28234	10666	38900	191.87KB	49.38KB	241.25KB	fineweb-2
drg_Latn	44	9	53	19570	4002	23572	139.67KB	47.33KB	186.99KB	fineweb-2
etr_Latn	44	12	56	40289	10988	51277	314.98KB	81.07KB	396.05KB	fineweb-2
ikw_Latn	44	8	52	26420	4803	31223	183.55KB	46.34KB	229.90KB	fineweb-2
miy_Latn	44	12	56	39420	10977	50397	350.62KB	88.87KB	439.49KB	fineweb-2, mala
mna_Latn	44	15	59	44217	15073	59290	278.67KB	91.16KB	369.83KB	fineweb-2
ngp_Latn	44	9	53	25353	5185	30538	200.72KB	42.72KB	243.44KB	fineweb-2
plw_Latn	44	10	54	33003	7500	40503	227.06KB	63.43KB	290.49KB	fineweb-2
qxl_Latn	44	12	56	19826	5407	25233	202.52KB	58.40KB	260.92KB	fineweb-2
yad_Latn	44	8	52	20446	3717	24163	239.49KB	47.21KB	286.70KB	fineweb-2
kqe_Latn	43	9	52	35106	7347	42453	238.00KB	47.21KB	285.20KB	fineweb-2
kyu_Latn	43	8	51	15200	2828	18028	107.61KB	57.48KB	165.09KB	fineweb-2
loq_Latn	43	9	52	32525	6807	39332	262.60KB	60.67KB	323.27KB	fineweb-2
mcb_Latn	43	8	51	15690	2919	18609	225.16KB	36.51KB	261.68KB	fineweb-2
mib_Latn	43	6	49	37816	5276	43092	228.09KB	51.11KB	279.20KB	fineweb-2
mlu_Latn	43	5	48	16365	1903	18268	113.02KB	20.79KB	133.81KB	fineweb-2
cnl_Latn	42	8	50	29651	5647	35298	320.48KB	51.42KB	371.90KB	fineweb-2
dhm_Latn	42	10	52	30447	7249	37696	218.81KB	55.76KB	274.57KB	fineweb-2
dru_Latn	42	11	53	10266	2688	12954	129.35KB	34.84KB	164.18KB	fineweb-2
maj_Latn	42	7	49	26063	4343	30406	217.63KB	45.56KB	263.19KB	fineweb-2
miz_Latn	42	8	50	36837	7016	43853	232.32KB	39.90KB	272.21KB	fineweb-2
mza_Latn	42	7	49	44566	7427	51993	232.88KB	67.48KB	300.36KB	fineweb-2
tav_Latn	42	5	47	46914	5585	52499	274.71KB	125.40KB	400.11KB	fineweb-2
tks_Arab	42	9	51	6893	1477	8370	61.87KB	31.48KB	93.34KB	mala
xsb_Latn	41	11	52	5993	1607	7600	70.92KB	36.25KB	107.17KB	fineweb-2
yka_Latn	41	7	48	35146	6000	41146	206.79KB	103.97KB	310.76KB	fineweb-2
anv_Latn	40	7	47	31004	5425	36429	213.11KB	61.61KB	274.72KB	fineweb-2
mcf_Latn	40	4	44	25059	2505	27564	237.10KB	29.56KB	266.66KB	fineweb-2
mir_Latn	40	7	47	26331	4608	30939	270.34KB	60.37KB	330.71KB	fineweb-2
smt_Latn	40	9	49	36642	6776	43418	181.49KB	131.55KB	313.04KB	fineweb-2, mala
wiu_Latn	40	17	57	30570	12992	43562	264.87KB	57.99KB	322.86KB	fineweb-2
mks_Latn	39	16	55	34711	14240	48951	246.53KB	111.92KB	358.45KB	fineweb-2
nhr_Latn	39	10	49	33356	8553	41909	211.63KB	67.03KB	278.66KB	fineweb-2
mgm_Latn	38	11	49	7276	1879	9155	58.87KB	19.95KB	78.82KB	fineweb-2, mala
boa_Latn	37	15	52	14652	5940	20592	195.55KB	75.39KB	270.94KB	fineweb-2
ign_Latn	37	10	47	18390	4970	23360	209.07KB	42.67KB	251.73KB	fineweb-2
yaa_Latn	37	6	43	24870	4032	28902	183.98KB	49.16KB	233.14KB	fineweb-2
apn_Latn	36	9	45	53025	13256	66281	329.92KB	73.51KB	403.43KB	fineweb-2
con_Latn	36	6	42	17212	2868	20080	165.84KB	38.47KB	204.31KB	fineweb-2
gyr_Latn	36	10	46	22289	6191	28480	167.71KB	62.85KB	230.55KB	fineweb-2
yan_Latn	36	6	42	17724	2954	20678	141.84KB	25.65KB	167.49KB	fineweb-2
zpi_Latn	36	18	54	18498	9249	27747	134.66KB	50.30KB	184.95KB	fineweb-2
ddg_Latn	35	20	55	10993	5694	16687	62.15KB	83.65KB	145.81KB	fineweb-2, mala
klt_Latn	35	11	46	31505	9901	41406	208.26KB	129.36KB	337.62KB	fineweb-2
twx_Latn	35	9	44	16817	4324	21141	114.40KB	70.62KB	185.02KB	fineweb-2
zae_Latn	35	8	43	26197	5988	32185	173.92KB	45.95KB	219.87KB	fineweb-2
cjk_Latn	34	10	44	9526	2802	12328	82.41KB	39.78KB	122.20KB	fineweb-2
dln_Latn	34	10	44	33224	9704	42928	168.07KB	85.93KB	254.00KB	fineweb-2, mala
ish_Latn	34	14	48	19516	8036	27552	132.53KB	68.26KB	200.79KB	fineweb-2
lcp_Thai	34	8	42	33035	7773	40808	381.94KB	100.21KB	482.15KB	fineweb-2
mbj_Latn	34	8	42	48190	11339	59529	194.20KB	153.07KB	347.27KB	fineweb-2
wbm_Latn	34	11	45	34761	11572	46333	90.02KB	177.66KB	267.68KB	fineweb-2, mala
aer_Latn	33	5	38	25743	3900	29643	287.85KB	20.71KB	308.56KB	fineweb-2
gah_Latn	33	9	42	21383	5831	27214	176.84KB	47.55KB	224.39KB	fineweb-2
ses_Latn	33	7	40	10051	2132	12183	80.61KB	14.47KB	95.08KB	fineweb-2
sgh_Cyrl	33	8	41	12015	2912	14927	108.39KB	86.68KB	195.08KB	fineweb-2
bdq_Latn	32	10	42	10290	3215	13505	96.12KB	21.05KB	117.17KB	fineweb-2
dnw_Latn	32	10	42	13909	4419	18328	81.25KB	69.55KB	150.80KB	fineweb-2, mala
gwi_Latn	32	8	40	21245	5311	26556	205.15KB	58.72KB	263.87KB	fineweb-2
lbk_Latn	32	8	40	22732	5683	28415	167.70KB	34.34KB	202.04KB	fineweb-2
ldi_Latn	32	11	43	12072	4150	16222	80.98KB	49.26KB	130.24KB	fineweb-2
pmf_Latn	32	4	36	19168	2434	21602	98.54KB	55.07KB	153.61KB	fineweb-2, mala
sdq_Latn	32	9	41	9578	2694	12272	65.66KB	50.63KB	116.28KB	fineweb-2
ycn_Latn	32	8	40	27219	6804	34023	218.71KB	93.73KB	312.44KB	fineweb-2
amx_Latn	31	7	38	26430	5968	32398	246.40KB	58.64KB	305.04KB	fineweb-2
hix_Latn	31	8	39	26563	6854	33417	211.06KB	44.39KB	255.44KB	fineweb-2
jic_Latn	31	6	37	25306	4898	30204	165.58KB	39.42KB	205.00KB	fineweb-2
pio_Latn	31	5	36	21522	3471	24993	241.29KB	34.72KB	276.01KB	fineweb-2
poy_Latn	31	12	43	17941	6944	24885	158.12KB	53.52KB	211.64KB	fineweb-2
mlh_Latn	30	9	39	21857	6557	28414	187.45KB	41.86KB	229.30KB	fineweb-2
xog_Latn	30	12	42	5568	2328	7896	67.07KB	24.54KB	91.61KB	fineweb-2, mala
box_Latn	29	6	35	25180	5209	30389	146.49KB	34.94KB	181.44KB	fineweb-2
neb_Latn	29	9	38	11538	3581	15119	107.63KB	24.95KB	132.58KB	fineweb-2
xtm_Latn	29	8	37	27812	7672	35484	165.17KB	25.67KB	190.84KB	fineweb-2
kau_Latn	29	11	40	8797	3336	12133	75.87KB	30.74KB	106.61KB	mala
cly_Latn	28	10	38	4460	1592	6052	53.78KB	17.09KB	70.87KB	fineweb-2
cux_Latn	28	7	35	24412	6103	30515	191.78KB	57.70KB	249.48KB	fineweb-2
dsh_Latn	28	10	38	25897	9249	35146	142.27KB	130.99KB	273.26KB	fineweb-2
lai_Latn	28	4	32	7264	1037	8301	63.87KB	20.99KB	84.85KB	fineweb-2
vag_Latn	28	5	33	8627	1540	10167	48.66KB	26.83KB	75.49KB	fineweb-2
cme_Latn	27	6	33	16265	3614	19879	132.95KB	17.20KB	150.15KB	fineweb-2
cot_Latn	27	6	33	10700	2377	13077	170.65KB	38.88KB	209.53KB	fineweb-2
for_Latn	27	7	34	18650	4835	23485	161.87KB	39.79KB	201.66KB	fineweb-2
gnb_Latn	27	11	38	19076	7771	26847	141.61KB	31.47KB	173.09KB	fineweb-2
kgr_Latn	27	9	36	19875	6625	26500	98.84KB	62.61KB	161.45KB	fineweb-2
ksc_Latn	27	5	32	15031	2783	17814	124.76KB	19.54KB	144.29KB	fineweb-2
zsr_Latn	27	7	34	15597	4043	19640	135.30KB	40.14KB	175.44KB	fineweb-2
pce_Mymr	27	7	34	10599	2748	13347	121.10KB	51.90KB	173.00KB	mala
tll_Latn	27	10	37	44096	16331	60427	306.87KB	141.75KB	448.62KB	mala
akh_Latn	26	9	35	49526	17143	66669	291.54KB	79.22KB	370.76KB	fineweb-2
anm_Latn	26	4	30	2726	419	3145	41.19KB	15.60KB	56.79KB	fineweb-2
apt_Latn	26	10	36	5173	1989	7162	51.50KB	18.37KB	69.86KB	fineweb-2
auc_Latn	26	12	38	23236	10724	33960	198.15KB	119.76KB	317.91KB	fineweb-2
bsp_Latn	26	8	34	15461	4757	20218	117.93KB	42.18KB	160.12KB	fineweb-2
ivb_Latn	26	5	31	3574	687	4261	38.75KB	15.26KB	54.01KB	fineweb-2
mrq_Latn	26	9	35	19661	6806	26467	62.72KB	90.26KB	152.98KB	fineweb-2
srq_Latn	26	7	33	17031	4585	21616	120.30KB	32.55KB	152.85KB	fineweb-2
ttj_Latn	26	6	32	13528	3121	16649	98.17KB	48.73KB	146.90KB	fineweb-2
uri_Latn	26	8	34	10633	3271	13904	40.55KB	57.87KB	98.42KB	fineweb-2
raj_Deva	26	6	32	44785	10335	55120	322.32KB	380.64KB	702.96KB	mala
ttt_Cyrl	26	12	38	1284	592	1876	19.26KB	16.63KB	35.89KB	mala
kmy_Latn	25	7	32	15919	4457	20376	111.35KB	29.15KB	140.50KB	fineweb-2
spm_Latn	25	7	32	26344	7376	33720	220.93KB	63.79KB	284.73KB	fineweb-2
cpu_Latn	24	8	32	15192	5064	20256	115.08KB	138.13KB	253.21KB	fineweb-2
cub_Latn	24	11	35	15107	6924	22031	163.82KB	49.85KB	213.67KB	fineweb-2
esi_Latn	24	5	29	8705	1813	10518	80.65KB	55.23KB	135.88KB	fineweb-2
kny_Latn	24	10	34	23827	9927	33754	94.51KB	142.59KB	237.11KB	fineweb-2
kxw_Latn	24	9	33	18738	7026	25764	141.64KB	63.95KB	205.60KB	fineweb-2
mig_Latn	24	15	39	14913	9320	24233	132.27KB	93.10KB	225.37KB	fineweb-2
tro_Latn	24	8	32	5226	1742	6968	51.55KB	30.63KB	82.18KB	fineweb-2
waj_Latn	24	8	32	22549	7516	30065	191.01KB	60.84KB	251.85KB	fineweb-2
whk_Latn	24	7	31	32090	9359	41449	94.74KB	141.28KB	236.02KB	fineweb-2
zsm_Arab	24	7	31	3512	1024	4536	58.84KB	15.60KB	74.44KB	fineweb-2
aph_Deva	24	7	31	7683	2240	9923	188.04KB	57.19KB	245.23KB	mala
bug_Bugi	24	6	30	1708	427	2135	29.40KB	14.27KB	43.68KB	mala
ian_Latn	23	8	31	21873	7608	29481	158.57KB	57.42KB	215.99KB	fineweb-2
lwo_Latn	23	5	28	18461	4013	22474	111.57KB	26.64KB	138.22KB	fineweb-2
mny_Latn	23	7	30	2291	697	2988	31.90KB	16.30KB	48.21KB	fineweb-2
omb_Latn	23	10	33	10398	4521	14919	51.40KB	66.04KB	117.44KB	fineweb-2
ruf_Latn	23	9	32	9370	3666	13036	81.89KB	36.32KB	118.21KB	fineweb-2
zav_Latn	23	5	28	16296	3542	19838	138.73KB	35.43KB	174.16KB	fineweb-2
hav_Latn	22	2	24	2458	223	2681	34.84KB	3.39KB	38.23KB	fineweb-2
kmm_Latn	22	9	31	19400	7936	27336	56.34KB	144.53KB	200.87KB	fineweb-2
maw_Latn	22	9	31	6779	2773	9552	38.46KB	34.89KB	73.35KB	fineweb-2
mhi_Latn	22	7	29	11184	3558	14742	53.13KB	40.27KB	93.40KB	fineweb-2
mrg_Latn	22	6	28	17568	4791	22359	79.15KB	116.27KB	195.42KB	fineweb-2
nnp_Latn	22	10	32	4050	1841	5891	36.82KB	33.36KB	70.18KB	fineweb-2
tuv_Latn	22	9	31	5471	2322	7793	42.85KB	34.72KB	77.57KB	fineweb-2, mala
trp_Latn	22	8	30	26116	9497	35613	68.20KB	182.67KB	250.87KB	mala
yrk_Latn	22	14	36	323	206	529	22.37KB	12.12KB	34.50KB	mala
bwu_Latn	21	7	28	11899	3966	15865	81.20KB	13.80KB	95.00KB	fineweb-2
ebk_Latn	21	3	24	16160	2308	18468	117.82KB	12.81KB	130.63KB	fineweb-2
fue_Latn	21	5	26	11670	2778	14448	94.82KB	18.95KB	113.77KB	fineweb-2
fuq_Latn	21	6	27	152343	43526	195869	1.48MB	28.88KB	1.50MB	fineweb-2
kzf_Latn	21	10	31	3187	1517	4704	32.66KB	25.16KB	57.82KB	fineweb-2
old_Latn	21	7	28	6631	2210	8841	67.89KB	12.21KB	80.09KB	fineweb-2
gqr_Latn	20	5	25	15552	3888	19440	110.44KB	19.49KB	129.93KB	fineweb-2
kub_Latn	20	5	25	35127	8781	43908	93.28KB	158.98KB	252.25KB	fineweb-2
muy_Latn	20	4	24	18755	3751	22506	109.22KB	37.67KB	146.88KB	fineweb-2
tbl_Latn	20	7	27	16569	5799	22368	85.94KB	33.02KB	118.96KB	fineweb-2
inh_Latn	20	15	35	597	448	1045	16.14KB	14.73KB	30.87KB	mala
snk_Latn	20	8	28	5464	2185	7649	29.62KB	20.04KB	49.66KB	mala
tpu_Khmr	20	7	27	8581	3003	11584	80.82KB	99.94KB	180.76KB	mala
are_Latn	19	10	29	8581	4516	13097	95.10KB	43.91KB	139.01KB	fineweb-2
arp_Latn	19	2	21	10868	1144	12012	138.41KB	8.54KB	146.95KB	fineweb-2
blt_Latn	19	6	25	29718	9384	39102	140.59KB	194.14KB	334.73KB	fineweb-2
daa_Latn	19	8	27	11530	4855	16385	77.16KB	40.89KB	118.04KB	fineweb-2
kqo_Latn	19	5	24	2720	716	3436	33.32KB	12.69KB	46.02KB	fineweb-2
ksp_Latn	19	4	23	14148	2978	17126	90.67KB	12.60KB	103.27KB	fineweb-2
nla_Latn	19	4	23	3200	595	3795	28.05KB	9.76KB	37.81KB	fineweb-2, mala
snw_Latn	19	5	24	12779	3363	16142	84.87KB	25.76KB	110.63KB	fineweb-2
wib_Latn	19	5	24	14595	3840	18435	100.60KB	30.81KB	131.41KB	fineweb-2
shn_Latn	19	5	24	12065	3175	15240	62.07KB	75.71KB	137.78KB	mala
bvc_Latn	18	3	21	1880	313	2193	27.97KB	8.61KB	36.58KB	fineweb-2
mog_Latn	18	5	23	5118	1421	6539	30.96KB	27.12KB	58.08KB	fineweb-2
njb_Latn	18	6	24	11492	3830	15322	103.34KB	21.52KB	124.86KB	fineweb-2
obo_Latn	18	11	29	6714	4103	10817	39.88KB	47.72KB	87.60KB	fineweb-2
tmd_Latn	18	8	26	15521	6898	22419	105.53KB	57.56KB	163.08KB	fineweb-2
hbb_Latn	18	7	25	8444	3284	11728	51.89KB	20.92KB	72.82KB	mala
khb_Talu	18	9	27	1514	757	2271	19.81KB	11.72KB	31.53KB	mala
kei_Latn	17	5	22	17760	5223	22983	51.24KB	74.69KB	125.93KB	fineweb-2
kmk_Latn	17	7	24	8872	3653	12525	72.01KB	26.34KB	98.35KB	fineweb-2
lip_Latn	17	6	23	11543	4074	15617	78.05KB	30.92KB	108.97KB	fineweb-2
lud_Latn	17	10	27	32700	19235	51935	173.07KB	195.06KB	368.13KB	fineweb-2
nmo_Latn	17	3	20	11960	2110	14070	107.69KB	8.92KB	116.61KB	fineweb-2
pss_Latn	17	6	23	2658	938	3596	26.39KB	11.93KB	38.32KB	fineweb-2
tpp_Latn	17	8	25	7748	3646	11394	72.15KB	34.90KB	107.05KB	fineweb-2
zad_Latn	17	6	23	7563	2669	10232	72.70KB	13.17KB	85.87KB	fineweb-2
krr_Khmr	17	8	25	13462	6335	19797	160.77KB	121.24KB	282.01KB	mala
zxx_Zyyy	17	8	25	2265	1066	3331	24.91KB	36.64KB	61.55KB	mala
adi_Latn	16	5	21	5011	1565	6576	46.82KB	15.57KB	62.38KB	fineweb-2
alj_Latn	16	8	24	5053	2526	7579	25.50KB	43.88KB	69.38KB	fineweb-2
any_Latn	16	5	21	14114	4410	18524	99.27KB	24.63KB	123.90KB	fineweb-2
biu_Latn	16	5	21	6045	1889	7934	40.15KB	24.12KB	64.27KB	fineweb-2
dug_Latn	16	4	20	6346	1548	7894	52.88KB	14.50KB	67.38KB	fineweb-2, mala
kam_Latn	16	4	20	696	174	870	7.49KB	5.65KB	13.14KB	fineweb-2, mala
mim_Latn	16	6	22	17968	6738	24706	137.65KB	41.75KB	179.40KB	fineweb-2
nlg_Latn	16	11	27	11104	7634	18738	65.35KB	56.98KB	122.33KB	fineweb-2
oto_Latn	16	3	19	8716	1634	10350	34.92KB	39.64KB	74.56KB	fineweb-2
qva_Latn	16	4	20	7733	1933	9666	86.22KB	21.91KB	108.12KB	fineweb-2
rhg_Latn	16	4	20	5327	1331	6658	49.66KB	11.45KB	61.11KB	fineweb-2
ttq_Latn	16	5	21	75805	23689	99494	55.34KB	830.49KB	885.83KB	fineweb-2
uth_Latn	16	4	20	12107	3026	15133	93.65KB	20.48KB	114.13KB	fineweb-2
lbe_Latn	16	6	22	786	294	1080	11.84KB	11.10KB	22.94KB	mala
syr_Latn	16	4	20	6143	1535	7678	60.02KB	21.17KB	81.19KB	mala
awi_Latn	15	7	22	2268	1058	3326	30.01KB	12.87KB	42.87KB	fineweb-2
frd_Latn	15	7	22	10147	4735	14882	74.60KB	22.65KB	97.25KB	fineweb-2
goa_Latn	15	9	24	2645	1587	4232	29.77KB	14.47KB	44.24KB	fineweb-2
ntp_Latn	15	4	19	6977	1860	8837	71.42KB	22.19KB	93.61KB	fineweb-2
var_Latn	15	10	25	9950	6633	16583	93.88KB	66.59KB	160.46KB	fineweb-2
yrl_Latn	15	3	18	2908	581	3489	33.24KB	6.07KB	39.31KB	fineweb-2
zpg_Latn	15	5	20	1611	537	2148	20.95KB	7.76KB	28.71KB	fineweb-2
got_Latn	15	7	22	63449	29609	93058	85.04KB	187.93KB	272.97KB	mala
cjv_Latn	14	5	19	16859	6021	22880	108.04KB	31.78KB	139.82KB	fineweb-2
dks_Latn	14	4	18	6266	1790	8056	53.05KB	10.50KB	63.55KB	fineweb-2
ghs_Latn	14	7	21	14536	7268	21804	104.11KB	55.92KB	160.03KB	fineweb-2
kgf_Latn	14	5	19	9431	3368	12799	79.81KB	25.87KB	105.68KB	fineweb-2
ots_Latn	14	11	25	3396	2668	6064	20.89KB	33.62KB	54.51KB	fineweb-2
sck_Deva	14	13	27	5912	5490	11402	95.68KB	74.18KB	169.87KB	fineweb-2
wap_Latn	14	6	20	11901	5100	17001	116.77KB	52.55KB	169.31KB	fineweb-2
whg_Latn	14	12	26	9482	8127	17609	26.25KB	97.34KB	123.59KB	fineweb-2
zpj_Latn	14	7	21	5263	2631	7894	34.80KB	27.65KB	62.45KB	fineweb-2
brb_Khmr	14	6	20	10782	4620	15402	108.53KB	70.66KB	179.19KB	mala
dty_Latn	14	6	20	964	413	1377	15.70KB	11.36KB	27.07KB	mala
tkr_Latn	14	10	24	275	196	471	9.71KB	9.24KB	18.95KB	mala
cdf_Latn	13	3	16	7112	1641	8753	43.99KB	31.83KB	75.82KB	fineweb-2
fuv_Arab	13	3	16	6420	1481	7901	103.31KB	5.81KB	109.12KB	fineweb-2
mns_Cyrl	13	6	19	5292	3121	8413	48.09KB	69.92KB	118.01KB	fineweb-2, mala
mpt_Latn	13	5	18	12403	4770	17173	82.94KB	37.73KB	120.67KB	fineweb-2
mrv_Latn	13	3	16	3462	799	4261	30.05KB	9.35KB	39.39KB	fineweb-2
nnw_Latn	13	6	19	5508	2542	8050	31.01KB	28.24KB	59.25KB	fineweb-2
nph_Latn	13	4	17	41798	12861	54659	47.88KB	432.27KB	480.15KB	fineweb-2
sfw_Latn	13	5	18	2468	949	3417	27.40KB	8.00KB	35.40KB	fineweb-2
taw_Latn	13	7	20	10245	5516	15761	73.54KB	35.06KB	108.60KB	fineweb-2
vmk_Latn	13	8	21	15986	9838	25824	196.71KB	26.15KB	222.86KB	fineweb-2
wsg_Telu	13	4	17	3077	947	4024	40.84KB	43.30KB	84.14KB	fineweb-2
dwr_Ethi	13	3	16	8647	1995	10642	108.27KB	24.42KB	132.69KB	mala
bih_Arab	13	7	20	292	156	448	6.51KB	3.69KB	10.20KB	New CC
bmk_Latn	12	4	16	11905	3968	15873	86.07KB	26.67KB	112.74KB	fineweb-2
bqj_Latn	12	7	19	5446	3176	8622	39.53KB	28.74KB	68.26KB	fineweb-2
cnw_Latn	12	7	19	34543	20150	54693	109.30KB	156.96KB	266.26KB	fineweb-2
crl_Cans	12	8	20	8825	5883	14708	119.48KB	78.57KB	198.05KB	fineweb-2
enl_Latn	12	4	16	48723	16241	64964	337.45KB	226.37KB	563.82KB	fineweb-2
ife_Latn	12	4	16	3363	1121	4484	25.64KB	17.50KB	43.13KB	fineweb-2
iqw_Latn	12	5	17	6589	2745	9334	59.65KB	21.53KB	81.18KB	fineweb-2
kmd_Latn	12	10	22	3642	3035	6677	41.07KB	20.66KB	61.73KB	fineweb-2
ktj_Latn	12	2	14	16885	2814	19699	123.60KB	2.26KB	125.87KB	fineweb-2
nfa_Latn	12	5	17	10333	4305	14638	73.98KB	27.98KB	101.96KB	fineweb-2
nmw_Latn	12	6	18	4299	2149	6448	34.16KB	20.95KB	55.11KB	fineweb-2
nre_Latn	12	5	17	4284	1785	6069	28.11KB	23.05KB	51.16KB	fineweb-2
sba_Latn	12	6	18	26161	13080	39241	205.13KB	16.67KB	221.80KB	fineweb-2
shu_Arab	12	6	18	3960	1980	5940	57.74KB	38.38KB	96.12KB	fineweb-2
tui_Latn	12	8	20	3667	2444	6111	20.83KB	24.95KB	45.78KB	fineweb-2
ury_Latn	12	6	18	5430	2715	8145	30.76KB	29.95KB	60.70KB	fineweb-2
cmo_Khmr	12	6	18	1110	555	1665	90.56KB	47.13KB	137.69KB	mala
nut_Latn	12	5	17	6448	2686	9134	24.07KB	37.76KB	61.83KB	mala
pli_Latn	12	2	14	7818	1303	9121	25.54KB	82.34KB	107.88KB	mala
xkg_Latn	12	6	18	4921	2460	7381	28.65KB	16.13KB	44.78KB	mala
ajz_Latn	11	4	15	2293	791	3084	23.51KB	15.13KB	38.63KB	fineweb-2, mala
bgz_Latn	11	3	14	3601	982	4583	18.54KB	22.14KB	40.68KB	fineweb-2
byv_Latn	11	2	13	2994	544	3538	19.77KB	6.22KB	25.99KB	fineweb-2
dtb_Latn	11	9	20	3297	2698	5995	20.54KB	33.03KB	53.56KB	fineweb-2
gvc_Latn	11	3	14	4898	1336	6234	45.79KB	5.87KB	51.66KB	fineweb-2
kpj_Latn	11	6	17	4238	2311	6549	49.16KB	22.76KB	71.91KB	fineweb-2
kru_Deva	11	4	15	6182	2248	8430	101.38KB	33.99KB	135.36KB	fineweb-2
lnd_Latn	11	5	16	6031	2741	8772	37.71KB	26.74KB	64.45KB	fineweb-2
mfh_Latn	11	8	19	7823	5857	13680	64.75KB	26.06KB	90.80KB	fineweb-2, mala
msc_Latn	11	3	14	10549	2877	13426	69.03KB	7.78KB	76.81KB	fineweb-2
nzm_Latn	11	5	16	4331	1968	6299	27.36KB	17.41KB	44.77KB	fineweb-2
sld_Latn	11	4	15	2166	787	2953	17.87KB	14.89KB	32.76KB	fineweb-2
tem_Latn	11	5	16	5589	2540	8129	33.37KB	24.77KB	58.15KB	fineweb-2
way_Latn	11	3	14	5643	1539	7182	44.10KB	25.51KB	69.61KB	fineweb-2
yup_Latn	11	4	15	3936	1431	5367	20.88KB	27.81KB	48.69KB	fineweb-2
kwu_Latn	11	4	15	11632	4230	15862	29.14KB	64.72KB	93.86KB	mala
wni_Latn	11	3	14	3215	877	4092	25.42KB	4.97KB	30.40KB	mala
bcw_Latn	10	4	14	13645	5458	19103	77.28KB	49.30KB	126.58KB	fineweb-2
fmu_Deva	10	4	14	2192	877	3069	30.40KB	35.09KB	65.49KB	fineweb-2
hop_Latn	10	6	16	2962	1777	4739	24.67KB	20.48KB	45.14KB	fineweb-2
ker_Latn	10	3	13	10341	3102	13443	42.18KB	61.22KB	103.40KB	fineweb-2
kia_Latn	10	3	13	2236	671	2907	15.72KB	8.00KB	23.73KB	fineweb-2
lom_Latn	10	3	13	1936	580	2516	19.64KB	4.64KB	24.28KB	fineweb-2
mev_Latn	10	4	14	2196	878	3074	21.79KB	6.23KB	28.02KB	fineweb-2
mip_Latn	10	2	12	4958	991	5949	35.67KB	6.52KB	42.19KB	fineweb-2
mta_Latn	10	4	14	14381	5752	20133	79.51KB	42.42KB	121.93KB	fineweb-2
nse_Latn	10	2	12	2679	535	3214	22.66KB	8.29KB	30.94KB	fineweb-2
ojb_Latn	10	3	13	6196	1859	8055	51.93KB	27.81KB	79.75KB	fineweb-2
qvo_Latn	10	5	15	2484	1574	4058	28.67KB	20.36KB	49.03KB	fineweb-2, mala
tnc_Latn	10	3	13	5069	1520	6589	56.41KB	20.66KB	77.07KB	fineweb-2
wlv_Latn	10	4	14	12507	5003	17510	17.76KB	108.74KB	126.50KB	fineweb-2
wlx_Latn	10	4	14	14074	5629	19703	99.12KB	18.91KB	118.03KB	fineweb-2
xsu_Latn	10	7	17	25928	18149	44077	10.08KB	227.27KB	237.35KB	fineweb-2
anp_Latn	10	5	15	5027	2513	7540	34.10KB	54.42KB	88.51KB	mala
arc_Latn	10	8	18	412	329	741	9.11KB	8.59KB	17.70KB	mala
gwc_Arab	10	6	16	1235	741	1976	13.33KB	9.42KB	22.75KB	mala
aim_Latn	9	4	13	3261	1449	4710	19.45KB	15.58KB	35.03KB	fineweb-2
cgg_Latn	9	4	13	1097	487	1584	13.91KB	8.94KB	22.85KB	fineweb-2
cul_Latn	9	5	14	7968	4426	12394	72.09KB	46.39KB	118.47KB	fineweb-2
fud_Latn	9	4	13	9792	4352	14144	13.40KB	69.43KB	82.84KB	fineweb-2
guq_Latn	9	3	12	4899	1633	6532	40.71KB	10.37KB	51.08KB	fineweb-2
jmc_Latn	9	4	13	4329	1924	6253	20.65KB	32.27KB	52.92KB	fineweb-2
lgl_Latn	9	2	11	7488	1664	9152	44.08KB	10.49KB	54.57KB	fineweb-2
mda_Latn	9	2	11	3962	880	4842	36.27KB	7.88KB	44.15KB	fineweb-2
mkl_Latn	9	3	12	6507	2169	8676	42.43KB	11.94KB	54.38KB	fineweb-2
ncq_Laoo	9	5	14	7349	4082	11431	116.01KB	45.38KB	161.39KB	fineweb-2
pkb_Latn	9	2	11	4053	803	4856	23.56KB	18.24KB	41.81KB	fineweb-2, mala
pne_Latn	9	3	12	13476	4492	17968	41.02KB	74.95KB	115.97KB	fineweb-2
poe_Latn	9	2	11	5971	1326	7297	47.47KB	9.24KB	56.71KB	fineweb-2
yam_Latn	9	2	11	33007	7334	40341	59.71KB	173.77KB	233.49KB	fineweb-2
jmx_Latn	9	5	14	1661	923	2584	15.04KB	10.92KB	25.96KB	mala
tdg_Deva	9	6	15	621	414	1035	14.31KB	11.24KB	25.55KB	mala
the_Deva	9	3	12	3451	1150	4601	47.64KB	30.46KB	78.10KB	mala
tkr_Cyrl	9	5	14	207	115	322	4.79KB	4.87KB	9.66KB	mala
zha_Hani	9	7	16	981	763	1744	40.27KB	83.40KB	123.67KB	mala
ade_Latn	8	6	14	13313	9984	23297	36.78KB	123.38KB	160.16KB	fineweb-2
chq_Latn	8	2	10	7186	1796	8982	63.48KB	10.14KB	73.62KB	fineweb-2
lem_Latn	8	2	10	3773	943	4716	28.94KB	12.35KB	41.29KB	fineweb-2
nbc_Latn	8	6	14	3280	2460	5740	22.67KB	32.24KB	54.91KB	fineweb-2
njm_Latn	8	4	12	9938	4969	14907	40.55KB	63.24KB	103.79KB	fineweb-2
nst_Latn	8	3	11	2960	1153	4113	23.05KB	11.31KB	34.36KB	fineweb-2, mala
pww_Thai	8	4	12	4422	2211	6633	9.63KB	66.02KB	75.65KB	fineweb-2
sju_Latn	8	2	10	2730	682	3412	30.46KB	10.99KB	41.45KB	fineweb-2
trs_Latn	8	3	11	2626	985	3611	26.13KB	5.03KB	31.17KB	fineweb-2
vot_Latn	8	3	11	897	336	1233	12.09KB	8.23KB	20.32KB	fineweb-2
zca_Latn	8	4	12	4494	2247	6741	34.79KB	9.46KB	44.25KB	fineweb-2
szy_Hani	8	7	15	448	392	840	7.05KB	36.95KB	44.00KB	mala
udi_Cyrl	8	8	16	155	155	310	3.94KB	4.51KB	8.46KB	mala
xal_Latn	8	5	13	2233	1396	3629	7.13KB	34.37KB	41.50KB	mala
bth_Latn	7	2	9	3802	1086	4888	21.11KB	15.79KB	36.91KB	fineweb-2
cho_Latn	7	3	10	1901	815	2716	14.20KB	13.09KB	27.29KB	fineweb-2
cjo_Latn	7	3	10	10089	4324	14413	42.82KB	119.21KB	162.03KB	fineweb-2
crj_Cans	7	2	9	1413	403	1816	17.06KB	12.17KB	29.23KB	fineweb-2
due_Latn	7	3	10	1618	693	2311	19.12KB	6.38KB	25.50KB	fineweb-2
gjn_Latn	7	5	12	4341	3100	7441	18.02KB	26.71KB	44.73KB	fineweb-2
kby_Latn	7	3	10	1406	999	2405	12.10KB	11.61KB	23.71KB	fineweb-2, mala
khq_Latn	7	3	10	580	248	828	9.24KB	5.10KB	14.34KB	fineweb-2
lap_Latn	7	5	12	3312	2366	5678	24.51KB	20.25KB	44.76KB	fineweb-2
met_Latn	7	2	9	2858	816	3674	24.35KB	5.97KB	30.31KB	fineweb-2
mhy_Latn	7	4	11	7091	4052	11143	29.62KB	58.72KB	88.34KB	fineweb-2
mkz_Latn	7	3	10	477	182	659	5.05KB	2.10KB	7.15KB	fineweb-2, mala
mnb_Latn	7	3	10	4925	2110	7035	29.27KB	30.16KB	59.43KB	fineweb-2
mtg_Latn	7	5	12	4729	3378	8107	14.91KB	46.88KB	61.79KB	fineweb-2
myb_Latn	7	3	10	2926	1254	4180	18.55KB	10.21KB	28.76KB	fineweb-2
niy_Latn	7	3	10	4744	2033	6777	36.18KB	24.16KB	60.35KB	fineweb-2
pxm_Latn	7	3	10	2709	1161	3870	13.12KB	23.92KB	37.03KB	fineweb-2
swk_Latn	7	4	11	7405	4231	11636	23.60KB	67.17KB	90.77KB	fineweb-2
tbk_Latn	7	2	9	545	155	700	7.78KB	2.86KB	10.64KB	fineweb-2
tmc_Latn	7	3	10	3595	1540	5135	12.69KB	27.44KB	40.12KB	fineweb-2
tpm_Latn	7	3	10	3693	1583	5276	26.54KB	4.90KB	31.44KB	fineweb-2
wob_Latn	7	3	10	2100	900	3000	16.96KB	7.57KB	24.53KB	fineweb-2
bwx_Latn	7	6	13	666	571	1237	4.92KB	5.64KB	10.57KB	mala
cre_Cans	7	6	13	191	163	354	4.52KB	5.42KB	9.94KB	mala
pnt_Latn	7	3	10	607	260	867	8.38KB	4.74KB	13.12KB	mala
sdk_Latn	7	3	10	2953	1265	4218	20.03KB	11.08KB	31.11KB	mala
skr_Latn	7	6	13	4681	4013	8694	26.85KB	45.81KB	72.66KB	mala
awb_Latn	6	2	8	3362	1264	4626	37.08KB	4.67KB	41.74KB	fineweb-2, mala
bav_Latn	6	4	10	18907	12605	31512	12.59KB	163.79KB	176.38KB	fineweb-2
duo_Latn	6	4	10	5186	3457	8643	35.28KB	16.99KB	52.27KB	fineweb-2
etu_Latn	6	4	10	1780	1186	2966	17.83KB	8.72KB	26.55KB	fineweb-2
kex_Deva	6	1	7	1269	211	1480	26.06KB	4.04KB	30.10KB	fineweb-2
kno_Latn	6	4	10	2795	1863	4658	11.24KB	16.69KB	27.93KB	fineweb-2
knx_Latn	6	1	7	1368	228	1596	15.46KB	2.08KB	17.54KB	fineweb-2
ktz_Latn	6	1	7	1878	313	2191	14.67KB	1.60KB	16.26KB	fineweb-2
led_Latn	6	3	9	1524	762	2286	12.17KB	7.45KB	19.62KB	fineweb-2
lis_Lisu	6	8	14	734	978	1712	10.51KB	15.05KB	25.56KB	fineweb-2
loe_Latn	6	2	8	1620	540	2160	17.45KB	2.89KB	20.33KB	fineweb-2
lun_Latn	6	2	8	4999	1666	6665	55.87KB	4.58KB	60.45KB	fineweb-2
mbd_Latn	6	5	11	5009	4174	9183	43.09KB	17.93KB	61.02KB	fineweb-2
mcn_Latn	6	4	10	3366	2244	5610	31.70KB	6.61KB	38.31KB	fineweb-2
mgo_Latn	6	3	9	2702	1232	3934	20.09KB	9.79KB	29.88KB	fineweb-2, mala
mzk_Latn	6	6	12	6035	6035	12070	18.65KB	48.24KB	66.89KB	fineweb-2
mzm_Latn	6	3	9	3775	1952	5727	20.28KB	15.93KB	36.21KB	fineweb-2, mala
naw_Latn	6	3	9	4812	2406	7218	34.79KB	12.73KB	47.51KB	fineweb-2
otn_Latn	6	2	8	4783	1594	6377	35.44KB	13.16KB	48.60KB	fineweb-2
pmq_Latn	6	5	11	5176	4314	9490	37.43KB	33.68KB	71.11KB	fineweb-2
rub_Latn	6	4	10	4192	2794	6986	39.44KB	27.23KB	66.67KB	fineweb-2
wat_Latn	6	2	8	1631	543	2174	17.79KB	3.09KB	20.88KB	fineweb-2
xbr_Latn	6	3	9	852	426	1278	8.31KB	5.79KB	14.11KB	fineweb-2
bkm_Latn	6	2	8	580	193	773	2.98KB	4.28KB	7.26KB	mala
bra_Deva	6	1	7	1415	235	1650	20.46KB	1.42KB	21.88KB	mala
jra_Khmr	6	1	7	234	39	273	4.95KB	512B	5.45KB	mala
stk_Latn	6	1	7	4451	741	5192	18.43KB	21.92KB	40.35KB	mala
tcy_Latn	6	4	10	2449	1632	4081	11.97KB	40.74KB	52.71KB	mala
tnl_Latn	6	3	9	497	248	745	5.67KB	2.14KB	7.81KB	mala
tnn_Latn	6	3	9	1184	592	1776	12.89KB	2.70KB	15.60KB	mala
bbk_Latn	5	0	5	2087	0	2087	16.96KB	0B	16.96KB	fineweb-2, mala
bhz_Latn	5	4	9	1765	1412	3177	14.37KB	17.08KB	31.45KB	fineweb-2
dip_Latn	5	1	6	2474	494	2968	16.20KB	3.32KB	19.52KB	fineweb-2
eka_Latn	5	4	9	833	667	1500	7.13KB	9.76KB	16.90KB	fineweb-2
gbr_Latn	5	2	7	2846	1138	3984	19.94KB	4.44KB	24.38KB	fineweb-2
gwr_Latn	5	2	7	2247	898	3145	20.38KB	9.85KB	30.23KB	fineweb-2
ifa_Latn	5	5	10	2593	2593	5186	29.94KB	8.99KB	38.92KB	fineweb-2
iri_Latn	5	3	8	2108	1264	3372	12.71KB	8.79KB	21.51KB	fineweb-2
ivv_Latn	5	1	6	1113	222	1335	11.10KB	3.86KB	14.96KB	fineweb-2
khy_Latn	5	4	9	758	606	1364	8.22KB	11.02KB	19.24KB	fineweb-2
kle_Deva	5	2	7	2450	980	3430	41.27KB	32.05KB	73.33KB	fineweb-2
kpq_Latn	5	3	8	1249	749	1998	10.60KB	8.24KB	18.84KB	fineweb-2
kvj_Latn	5	2	7	2931	1172	4103	17.04KB	11.25KB	28.29KB	fineweb-2
mfg_Latn	5	3	8	1095	657	1752	5.80KB	8.98KB	14.78KB	fineweb-2
mhw_Latn	5	2	7	14850	5940	20790	142.06KB	4.05KB	146.11KB	fineweb-2
mnf_Latn	5	1	6	641	61	702	7.14KB	1.06KB	8.21KB	fineweb-2, mala
moa_Latn	5	3	8	5632	3379	9011	22.12KB	36.51KB	58.63KB	fineweb-2
ndj_Latn	5	3	8	1563	938	2501	16.81KB	7.28KB	24.09KB	fineweb-2
rim_Latn	5	1	6	1905	381	2286	19.64KB	1.80KB	21.44KB	fineweb-2
rnl_Latn	5	1	6	3664	764	4428	21.72KB	4.26KB	25.99KB	fineweb-2, mala
sri_Latn	5	4	9	2658	2127	4785	28.83KB	23.24KB	52.07KB	fineweb-2
tcc_Latn	5	2	7	1830	732	2562	14.38KB	11.36KB	25.75KB	fineweb-2
zam_Latn	5	7	12	564	789	1353	6.60KB	11.16KB	17.75KB	fineweb-2
zpq_Latn	5	6	11	3198	3837	7035	20.22KB	43.34KB	63.55KB	fineweb-2
azn_Latn	5	2	7	1772	709	2481	14.88KB	2.77KB	17.65KB	mala
bfn_Latn	5	0	5	272	0	272	2.99KB	0B	2.99KB	mala
nxa_Latn	5	5	10	304	304	608	3.37KB	3.03KB	6.39KB	mala
tuz_Latn	5	3	8	3271	1963	5234	15.14KB	25.38KB	40.53KB	mala
ybi_Deva	5	3	8	1310	786	2096	20.29KB	25.76KB	46.05KB	mala
atq_Latn	4	6	10	905	1358	2263	7.42KB	16.92KB	24.34KB	fineweb-2
bea_Latn	4	1	5	2482	620	3102	22.51KB	5.42KB	27.92KB	fineweb-2
bkl_Latn	4	0	4	4565	0	4565	34.18KB	0B	34.18KB	fineweb-2
bkv_Latn	4	0	4	4464	0	4464	25.97KB	0B	25.97KB	fineweb-2
bsc_Latn	4	4	8	1694	1694	3388	7.91KB	18.38KB	26.30KB	fineweb-2
csk_Latn	4	0	4	1556	0	1556	14.80KB	0B	14.80KB	fineweb-2
ddn_Latn	4	1	5	1328	332	1660	14.83KB	2.19KB	17.02KB	fineweb-2
did_Latn	4	1	5	2248	562	2810	19.99KB	5.93KB	25.92KB	fineweb-2
dis_Latn	4	7	11	831	1455	2286	6.66KB	17.88KB	24.54KB	fineweb-2
diu_Latn	4	1	5	4080	1020	5100	40.71KB	1.77KB	42.48KB	fineweb-2
gej_Latn	4	3	7	2015	1511	3526	15.19KB	10.14KB	25.34KB	fineweb-2
giz_Latn	4	3	7	1976	1482	3458	13.61KB	10.83KB	24.45KB	fineweb-2
gna_Latn	4	3	7	574	431	1005	9.36KB	4.54KB	13.90KB	fineweb-2
kdj_Latn	4	2	6	1254	627	1881	10.66KB	6.23KB	16.89KB	fineweb-2
kff_Telu	4	2	6	12803	6401	19204	352.65KB	25.64KB	378.29KB	fineweb-2
ksj_Latn	4	1	5	2386	596	2982	21.46KB	7.17KB	28.63KB	fineweb-2
ktm_Latn	4	4	8	2469	2469	4938	25.50KB	7.55KB	33.04KB	fineweb-2
kyu_Kali	4	2	6	951	475	1426	21.64KB	13.94KB	35.59KB	fineweb-2
kzn_Latn	4	0	4	1443	0	1443	15.32KB	0B	15.32KB	fineweb-2
lhi_Latn	4	1	5	3104	776	3880	19.88KB	2.16KB	22.04KB	fineweb-2
lob_Latn	4	3	7	1096	822	1918	13.35KB	4.12KB	17.47KB	fineweb-2
moc_Latn	4	2	6	2237	1118	3355	25.34KB	4.42KB	29.76KB	fineweb-2
mph_Latn	4	3	7	5879	4409	10288	43.13KB	34.15KB	77.28KB	fineweb-2
mua_Latn	4	2	6	1093	546	1639	5.28KB	8.01KB	13.29KB	fineweb-2
mus_Latn	4	5	9	862	1077	1939	9.06KB	11.90KB	20.96KB	fineweb-2
mvn_Latn	4	0	4	2053	0	2053	18.79KB	0B	18.79KB	fineweb-2
nbe_Latn	4	0	4	1604	0	1604	13.06KB	0B	13.06KB	fineweb-2
ndi_Latn	4	1	5	1192	298	1490	12.60KB	1.00KB	13.60KB	fineweb-2
ngb_Latn	4	1	5	877	219	1096	8.00KB	1.14KB	9.14KB	fineweb-2
njz_Latn	4	1	5	732	183	915	9.35KB	1.52KB	10.88KB	fineweb-2
nlc_Latn	4	0	4	9126	0	9126	86.85KB	0B	86.85KB	fineweb-2
nma_Latn	4	2	6	2736	1368	4104	12.86KB	20.88KB	33.74KB	fineweb-2
npo_Latn	4	1	5	2224	556	2780	14.98KB	6.69KB	21.67KB	fineweb-2
nwb_Latn	4	1	5	1632	408	2040	14.39KB	4.48KB	18.87KB	fineweb-2
nxd_Latn	4	0	4	966	0	966	11.61KB	0B	11.61KB	fineweb-2
pbi_Latn	4	1	5	1624	406	2030	9.67KB	6.89KB	16.56KB	fineweb-2
pmx_Latn	4	3	7	1677	1257	2934	13.59KB	12.79KB	26.38KB	fineweb-2
pse_Latn	4	0	4	7818	0	7818	66.18KB	0B	66.18KB	fineweb-2
qus_Latn	4	3	7	7750	5813	13563	17.94KB	92.08KB	110.02KB	fineweb-2
rjs_Deva	4	2	6	4452	2226	6678	62.93KB	25.97KB	88.89KB	fineweb-2
sjo_Mong	4	4	8	1204	1204	2408	9.01KB	28.50KB	37.52KB	fineweb-2
ssx_Latn	4	2	6	3344	1672	5016	26.78KB	16.73KB	43.51KB	fineweb-2
tig_Ethi	4	2	6	543	271	814	10.38KB	4.38KB	14.77KB	fineweb-2
twb_Latn	4	1	5	712	178	890	6.97KB	2.36KB	9.33KB	fineweb-2
urw_Latn	4	2	6	2314	1157	3471	12.88KB	16.34KB	29.22KB	fineweb-2
ang_Runr	4	3	7	1237	927	2164	21.46KB	13.21KB	34.67KB	mala
awa_Latn	4	5	9	484	605	1089	2.78KB	14.50KB	17.28KB	mala
bkx_Latn	4	0	4	281	0	281	2.79KB	0B	2.79KB	mala
gal_Latn	4	3	7	338	254	592	3.45KB	2.11KB	5.56KB	mala
idt_Latn	4	2	6	349	174	523	3.08KB	1.54KB	4.62KB	mala
kbq_Latn	4	0	4	759	0	759	6.98KB	0B	6.98KB	mala
kpy_Armn	4	0	4	42	0	42	1.77KB	0B	1.77KB	mala
mrn_Latn	4	2	6	1244	622	1866	10.51KB	1.90KB	12.41KB	mala
nlv_Latn	4	3	7	1365	1023	2388	13.56KB	9.21KB	22.77KB	mala
oki_Latn	4	2	6	2239	1119	3358	15.89KB	13.43KB	29.32KB	mala
psp_Latn	4	1	5	472	118	590	3.51KB	1.58KB	5.10KB	mala
rki_Latn	4	3	7	2286	1715	4001	60.25KB	31.02KB	91.27KB	mala
sgd_Latn	4	1	5	276	69	345	3.08KB	492B	3.56KB	mala
tkd_Latn	4	0	4	253	0	253	2.61KB	0B	2.61KB	mala
ybh_Deva	4	0	4	1922	0	1922	45.01KB	0B	45.01KB	mala
ymp_Latn	4	1	5	1508	377	1885	8.93KB	3.92KB	12.85KB	mala
yux_Cyrl	4	3	7	50	37	87	2.11KB	1.67KB	3.78KB	mala
ayo_Latn	3	0	3	888	0	888	7.55KB	0B	7.55KB	fineweb-2
bex_Latn	3	0	3	2095	0	2095	14.28KB	0B	14.28KB	fineweb-2
bom_Latn	3	0	3	741	0	741	5.91KB	0B	5.91KB	fineweb-2
bov_Latn	3	0	3	711	0	711	6.21KB	0B	6.21KB	fineweb-2
btt_Latn	3	2	5	3274	2183	5457	27.14KB	2.29KB	29.43KB	fineweb-2
cou_Latn	3	2	5	1269	846	2115	9.69KB	7.71KB	17.40KB	fineweb-2
dhg_Latn	3	0	3	2756	0	2756	26.77KB	0B	26.77KB	fineweb-2
enx_Latn	3	1	4	1320	440	1760	14.70KB	1.26KB	15.96KB	fineweb-2
ess_Latn	3	1	4	451	150	601	4.25KB	5.92KB	10.16KB	fineweb-2
gud_Latn	3	1	4	1779	593	2372	16.86KB	1.06KB	17.92KB	fineweb-2
guu_Latn	3	1	4	1650	550	2200	13.44KB	4.03KB	17.47KB	fineweb-2
ilb_Latn	3	2	5	1638	1092	2730	22.19KB	4.48KB	26.67KB	fineweb-2
kdh_Latn	3	1	4	625	208	833	9.41KB	1.62KB	11.03KB	fineweb-2
kqs_Latn	3	0	3	1951	0	1951	13.46KB	0B	13.46KB	fineweb-2
krx_Latn	3	1	4	3111	1037	4148	27.69KB	3.01KB	30.71KB	fineweb-2
ldn_Latn	3	0	3	553	0	553	5.34KB	0B	5.34KB	fineweb-2
log_Latn	3	0	3	2161	0	2161	15.73KB	0B	15.73KB	fineweb-2
lol_Latn	3	0	3	529	0	529	5.64KB	0B	5.64KB	fineweb-2
maf_Latn	3	1	4	1767	589	2356	12.85KB	2.36KB	15.21KB	fineweb-2
max_Latn	3	1	4	550	183	733	7.65KB	1.40KB	9.04KB	fineweb-2
mfk_Latn	3	0	3	1894	0	1894	11.54KB	0B	11.54KB	fineweb-2
mgc_Latn	3	1	4	1453	484	1937	11.30KB	2.16KB	13.46KB	fineweb-2
mpg_Latn	3	3	6	2253	2253	4506	6.62KB	21.33KB	27.95KB	fineweb-2
mtj_Latn	3	0	3	5408	0	5408	37.48KB	0B	37.48KB	fineweb-2
mwm_Latn	3	1	4	2548	849	3397	14.91KB	8.68KB	23.59KB	fineweb-2
ndz_Latn	3	0	3	3666	0	3666	20.86KB	0B	20.86KB	fineweb-2
ngc_Latn	3	0	3	505	0	505	5.84KB	0B	5.84KB	fineweb-2
nio_Cyrl	3	0	3	11271	0	11271	148.81KB	0B	148.81KB	fineweb-2
nmz_Latn	3	2	5	1852	1234	3086	17.27KB	2.96KB	20.23KB	fineweb-2
nri_Latn	3	0	3	3543	0	3543	24.54KB	0B	24.54KB	fineweb-2
nuz_Latn	3	0	3	481	0	481	6.60KB	0B	6.60KB	fineweb-2
pps_Latn	3	0	3	803	0	803	7.35KB	0B	7.35KB	fineweb-2
sbd_Latn	3	2	5	239	159	398	2.93KB	2.50KB	5.43KB	fineweb-2
sbs_Latn	3	1	4	314	104	418	4.95KB	1.59KB	6.54KB	fineweb-2
shk_Latn	3	0	3	1719	0	1719	11.63KB	0B	11.63KB	fineweb-2
soe_Latn	3	2	5	208	139	347	3.33KB	2.69KB	6.02KB	fineweb-2
tlb_Latn	3	1	4	8565	2855	11420	26.32KB	76.04KB	102.36KB	fineweb-2
tlj_Latn	3	0	3	1169	0	1169	11.92KB	0B	11.92KB	fineweb-2
tpw_Latn	3	2	5	5709	3806	9515	12.59KB	74.34KB	86.92KB	fineweb-2
vut_Latn	3	0	3	4492	0	4492	28.30KB	0B	28.30KB	fineweb-2
wwa_Latn	3	1	4	2759	919	3678	21.16KB	1.16KB	22.33KB	fineweb-2
xnn_Latn	3	0	3	359	0	359	4.32KB	0B	4.32KB	fineweb-2
yim_Latn	3	1	4	3858	1286	5144	19.42KB	19.59KB	39.02KB	fineweb-2
bcc_Arab	3	0	3	183	0	183	2.46KB	0B	2.46KB	mala
boz_Latn	3	2	5	5026	3351	8377	31.42KB	14.97KB	46.38KB	mala
bze_Latn	3	1	4	5239	1746	6985	28.99KB	7.71KB	36.70KB	mala
chu_Latn	3	1	4	285	95	380	4.87KB	952B	5.80KB	mala
cuh_Latn	3	0	3	445	0	445	4.63KB	0B	4.63KB	mala
emp_Cyrl	3	0	3	245	0	245	4.96KB	0B	4.96KB	mala
hro_Latn	3	0	3	2197	0	2197	15.75KB	0B	15.75KB	mala
lwl_Thai	3	2	5	115	77	192	2.53KB	1.08KB	3.61KB	mala
mry_Latn	3	1	4	517	172	689	4.35KB	1.29KB	5.65KB	mala
nco_Latn	3	0	3	362	0	362	4.37KB	0B	4.37KB	mala
nxl_Latn	3	0	3	963	0	963	6.59KB	0B	6.59KB	mala
pex_Latn	3	0	3	408	0	408	2.88KB	0B	2.88KB	mala
tnp_Latn	3	0	3	213	0	213	2.41KB	0B	2.41KB	mala
vif_Latn	3	0	3	1127	0	1127	8.75KB	0B	8.75KB	mala
abi_Latn	2	1	3	774	387	1161	8.24KB	1.50KB	9.74KB	fineweb-2
aha_Latn	2	0	2	189	0	189	2.86KB	0B	2.86KB	fineweb-2
avn_Latn	2	0	2	2014	0	2014	11.90KB	0B	11.90KB	fineweb-2
bkq_Latn	2	3	5	582	873	1455	10.63KB	6.13KB	16.76KB	fineweb-2
bmv_Latn	2	0	2	160	0	160	2.92KB	0B	2.92KB	fineweb-2
chj_Latn	2	0	2	1817	0	1817	17.02KB	0B	17.02KB	fineweb-2
crt_Latn	2	0	2	3232	0	3232	24.80KB	0B	24.80KB	fineweb-2
dos_Latn	2	0	2	1204	0	1204	8.75KB	0B	8.75KB	fineweb-2
eto_Latn	2	0	2	4142	0	4142	33.13KB	0B	33.13KB	fineweb-2
gof_Ethi	2	2	4	823	823	1646	12.03KB	8.68KB	20.71KB	fineweb-2
gya_Latn	2	0	2	1378	0	1378	9.60KB	0B	9.60KB	fineweb-2
hlt_Latn	2	0	2	2639	0	2639	14.29KB	0B	14.29KB	fineweb-2
jaa_Latn	2	1	3	882	441	1323	7.44KB	4.22KB	11.66KB	fineweb-2
jun_Orya	2	0	2	371	0	371	7.75KB	0B	7.75KB	fineweb-2
kqy_Ethi	2	1	3	1102	551	1653	9.72KB	8.57KB	18.28KB	fineweb-2
ksb_Latn	2	1	3	522	261	783	6.43KB	1.68KB	8.11KB	fineweb-2
ktb_Ethi	2	0	2	570	0	570	8.34KB	0B	8.34KB	fineweb-2
lgm_Latn	2	0	2	877	0	877	7.24KB	0B	7.24KB	fineweb-2
mnx_Latn	2	0	2	632	0	632	5.12KB	0B	5.12KB	fineweb-2
mor_Latn	2	0	2	2391	0	2391	20.33KB	0B	20.33KB	fineweb-2
mug_Latn	2	0	2	1859	0	1859	10.78KB	0B	10.78KB	fineweb-2
nng_Latn	2	0	2	651	0	651	6.79KB	0B	6.79KB	fineweb-2
nnh_Latn	2	0	2	4313	0	4313	26.70KB	0B	26.70KB	fineweb-2
nsa_Latn	2	0	2	1417	0	1417	12.36KB	0B	12.36KB	fineweb-2
nyk_Latn	2	0	2	465	0	465	5.08KB	0B	5.08KB	fineweb-2
otd_Latn	2	0	2	8138	0	8138	75.40KB	0B	75.40KB	fineweb-2
oym_Latn	2	0	2	2470	0	2470	18.24KB	0B	18.24KB	fineweb-2
phm_Latn	2	0	2	631	0	631	6.67KB	0B	6.67KB	fineweb-2
prq_Latn	2	0	2	935	0	935	10.57KB	0B	10.57KB	fineweb-2
saj_Latn	2	0	2	259	0	259	3.72KB	0B	3.72KB	fineweb-2
szb_Latn	2	0	2	8583	0	8583	72.78KB	0B	72.78KB	fineweb-2
tap_Latn	2	0	2	6313	0	6313	43.28KB	0B	43.28KB	fineweb-2
tnr_Latn	2	0	2	1237	0	1237	9.65KB	0B	9.65KB	fineweb-2
tzl_Latn	2	0	2	168	0	168	2.83KB	0B	2.83KB	fineweb-2
wew_Latn	2	0	2	531	0	531	4.81KB	0B	4.81KB	fineweb-2
yas_Latn	2	0	2	1013	0	1013	8.99KB	0B	8.99KB	fineweb-2, mala
yuz_Latn	2	0	2	653	0	653	7.10KB	0B	7.10KB	fineweb-2
avk_Cyrl	2	0	2	855	0	855	11.79KB	0B	11.79KB	mala
bat_Cyrl	2	0	2	141	0	141	2.80KB	0B	2.80KB	mala
baw_Latn	2	0	2	806	0	806	5.79KB	0B	5.79KB	mala
bwt_Latn	2	0	2	931	0	931	6.01KB	0B	6.01KB	mala
bxa_Latn	2	0	2	770	0	770	5.05KB	0B	5.05KB	mala
cjs_Latn	2	0	2	88	0	88	3.11KB	0B	3.11KB	mala
csw_Cans	2	0	2	1889	0	1889	22.96KB	0B	22.96KB	mala
fli_Latn	2	0	2	1207	0	1207	7.65KB	0B	7.65KB	mala
kqr_Latn	2	0	2	404	0	404	2.88KB	0B	2.88KB	mala
lan_Latn	2	0	2	1269	0	1269	9.44KB	0B	9.44KB	mala
lgr_Latn	2	0	2	309	0	309	2.14KB	0B	2.14KB	mala
lhm_Deva	2	0	2	227	0	227	4.60KB	0B	4.60KB	mala
mlk_Latn	2	0	2	612	0	612	5.60KB	0B	5.60KB	mala
mve_Arab	2	0	2	37	0	37	1.00KB	0B	1.00KB	mala
nqo_Latn	2	0	2	800	0	800	10.63KB	0B	10.63KB	mala
odk_Arab	2	0	2	552	0	552	5.96KB	0B	5.96KB	mala
olo_Cyrl	2	0	2	552	0	552	8.77KB	0B	8.77KB	mala
otq_Arab	2	0	2	515	0	515	10.59KB	0B	10.59KB	mala
pea_Latn	2	0	2	224	0	224	2.10KB	0B	2.10KB	mala
saq_Latn	2	0	2	1095	0	1095	9.22KB	0B	9.22KB	mala
sea_Latn	2	0	2	1886	0	1886	11.70KB	0B	11.70KB	mala
sps_Latn	2	0	2	293	0	293	2.10KB	0B	2.10KB	mala
tio_Latn	2	0	2	720	0	720	4.35KB	0B	4.35KB	mala
trv_Hani	2	1	3	22	11	33	5.55KB	552B	6.09KB	mala
yin_Latn	2	0	2	589	0	589	3.63KB	0B	3.63KB	mala
adl_Latn	1	0	1	657	0	657	4.52KB	0B	4.52KB	fineweb-2
agw_Latn	1	0	1	770	0	770	4.42KB	0B	4.42KB	fineweb-2
ald_Latn	1	0	1	119	0	119	1.75KB	0B	1.75KB	fineweb-2
asg_Latn	1	0	1	420	0	420	3.00KB	0B	3.00KB	fineweb-2
bnj_Latn	1	0	1	292	0	292	2.69KB	0B	2.69KB	fineweb-2
brx_Latn	1	0	1	145	0	145	2.05KB	0B	2.05KB	fineweb-2
bwi_Latn	1	0	1	111	0	111	1.74KB	0B	1.74KB	fineweb-2
bwq_Latn	1	0	1	79	0	79	1.15KB	0B	1.15KB	fineweb-2
dbq_Latn	1	0	1	1258	0	1258	8.55KB	0B	8.55KB	fineweb-2
dow_Latn	1	0	1	1049	0	1049	6.36KB	0B	6.36KB	fineweb-2
fad_Latn	1	0	1	1606	0	1606	9.84KB	0B	9.84KB	fineweb-2
hoc_Latn	1	0	1	82	0	82	1.50KB	0B	1.50KB	fineweb-2
kql_Latn	1	0	1	951	0	951	8.52KB	0B	8.52KB	fineweb-2
ksf_Latn	1	0	1	531	0	531	3.77KB	0B	3.77KB	fineweb-2
kyu_Mymr	1	0	1	498	0	498	10.30KB	0B	10.30KB	fineweb-2
lea_Latn	1	0	1	283	0	283	3.46KB	0B	3.46KB	fineweb-2
mse_Latn	1	0	1	477	0	477	3.06KB	0B	3.06KB	fineweb-2
mzl_Latn	1	0	1	1235	0	1235	12.25KB	0B	12.25KB	fineweb-2
nhd_Latn	1	0	1	75	0	75	1.30KB	0B	1.30KB	fineweb-2
nnl_Latn	1	0	1	82	0	82	1.72KB	0B	1.72KB	fineweb-2
nwx_Deva	1	0	1	623	0	623	10.55KB	0B	10.55KB	fineweb-2
pbc_Latn	1	0	1	59	0	59	1.12KB	0B	1.12KB	fineweb-2
pny_Latn	1	0	1	207	0	207	2.34KB	0B	2.34KB	fineweb-2
tul_Latn	1	0	1	659	0	659	4.16KB	0B	4.16KB	fineweb-2
aaa_Latn	1	0	1	16	0	16	462B	0B	462B	mala
abc_Latn	1	0	1	70	0	70	863B	0B	863B	mala
agq_Latn	1	0	1	545	0	545	3.71KB	0B	3.71KB	mala
ags_Latn	1	0	1	405	0	405	3.29KB	0B	3.29KB	mala
azo_Latn	1	0	1	460	0	460	3.64KB	0B	3.64KB	mala
bag_Latn	1	0	1	368	0	368	3.31KB	0B	3.31KB	mala
bax_Latn	1	0	1	404	0	404	3.53KB	0B	3.53KB	mala
bce_Latn	1	0	1	299	0	299	2.53KB	0B	2.53KB	mala
bfm_Latn	1	0	1	426	0	426	2.96KB	0B	2.96KB	mala
bgf_Latn	1	0	1	418	0	418	2.51KB	0B	2.51KB	mala
bhs_Latn	1	0	1	531	0	531	3.16KB	0B	3.16KB	mala
bkc_Latn	1	0	1	63	0	63	713B	0B	713B	mala
bkh_Latn	1	0	1	487	0	487	3.50KB	0B	3.50KB	mala
blk_Latn	1	0	1	422	0	422	10.63KB	0B	10.63KB	mala
bob_Latn	1	0	1	99	0	99	1004B	0B	1004B	mala
bqm_Latn	1	0	1	375	0	375	2.39KB	0B	2.39KB	mala
bri_Latn	1	0	1	493	0	493	3.81KB	0B	3.81KB	mala
brv_Laoo	1	0	1	263	0	263	3.77KB	0B	3.77KB	mala
buo_Latn	1	0	1	264	0	264	2.08KB	0B	2.08KB	mala
bya_Latn	1	0	1	103	0	103	953B	0B	953B	mala
chp_Cans	1	0	1	892	0	892	8.86KB	0B	8.86KB	mala
cim_Latn	1	0	1	157	0	157	1.23KB	0B	1.23KB	mala
clo_Latn	1	0	1	180	0	180	2.11KB	0B	2.11KB	mala
cuv_Latn	1	0	1	503	0	503	3.09KB	0B	3.09KB	mala
dag_Arab	1	0	1	26	0	26	686B	0B	686B	mala
dmg_Latn	1	0	1	159	0	159	1.49KB	0B	1.49KB	mala
dtr_Latn	1	0	1	140	0	140	1.13KB	0B	1.13KB	mala
eee_Thai	1	0	1	37	0	37	3.77KB	0B	3.77KB	mala
ekm_Latn	1	0	1	302	0	302	3.01KB	0B	3.01KB	mala
enc_Latn	1	0	1	34	0	34	484B	0B	484B	mala
fiu_Cyrl	1	0	1	55	0	55	735B	0B	735B	mala
gbj_Orya	1	0	1	468	0	468	8.77KB	0B	8.77KB	mala
gou_Latn	1	0	1	713	0	713	4.27KB	0B	4.27KB	mala
hao_Latn	1	0	1	251	0	251	1.51KB	0B	1.51KB	mala
hna_Latn	1	0	1	418	0	418	2.72KB	0B	2.72KB	mala
hre_Latn	1	0	1	1307	0	1307	7.63KB	0B	7.63KB	mala
isu_Latn	1	0	1	766	0	766	4.87KB	0B	4.87KB	mala
jgo_Latn	1	0	1	352	0	352	3.04KB	0B	3.04KB	mala
kbx_Latn	1	0	1	193	0	193	1.35KB	0B	1.35KB	mala
kvt_Latn	1	0	1	182	0	182	1.20KB	0B	1.20KB	mala
kxp_Arab	1	0	1	20	0	20	507B	0B	507B	mala
lfa_Latn	1	0	1	384	0	384	3.73KB	0B	3.73KB	mala
lkb_Latn	1	0	1	246	0	246	2.10KB	0B	2.10KB	mala
lns_Latn	1	0	1	499	0	499	3.43KB	0B	3.43KB	mala
loh_Latn	1	0	1	83	0	83	906B	0B	906B	mala
lrc_Latn	1	0	1	69	0	69	1.17KB	0B	1.17KB	mala
lts_Latn	1	0	1	48	0	48	711B	0B	711B	mala
luy_Latn	1	0	1	68	0	68	820B	0B	820B	mala
mdr_Latn	1	0	1	164	0	164	1.33KB	0B	1.33KB	mala
mfj_Latn	1	0	1	472	0	472	2.90KB	0B	2.90KB	mala
mgg_Latn	1	0	1	458	0	458	3.14KB	0B	3.14KB	mala
mgq_Latn	1	0	1	220	0	220	2.31KB	0B	2.31KB	mala
mle_Latn	1	0	1	79	0	79	766B	0B	766B	mala
mlw_Latn	1	0	1	403	0	403	3.15KB	0B	3.15KB	mala
mmu_Latn	1	0	1	321	0	321	3.19KB	0B	3.19KB	mala
mne_Latn	1	0	1	33	0	33	936B	0B	936B	mala
muv_Mlym	1	0	1	77	0	77	2.36KB	0B	2.36KB	mala
mxu_Latn	1	0	1	325	0	325	2.35KB	0B	2.35KB	mala
nge_Latn	1	0	1	415	0	415	3.10KB	0B	3.10KB	mala
ngn_Latn	1	0	1	440	0	440	2.67KB	0B	2.67KB	mala
njy_Latn	1	0	1	520	0	520	3.49KB	0B	3.49KB	mala
nsk_Cans	1	0	1	196	0	196	3.21KB	0B	3.21KB	mala
nsk_Latn	1	0	1	167	0	167	2.50KB	0B	2.50KB	mala
nwe_Latn	1	0	1	322	0	322	2.54KB	0B	2.54KB	mala
nza_Latn	1	0	1	401	0	401	2.46KB	0B	2.46KB	mala
pcg_Mlym	1	0	1	99	0	99	2.30KB	0B	2.30KB	mala
pdu_Latn	1	0	1	95	0	95	817B	0B	817B	mala
pnz_Latn	1	0	1	393	0	393	3.09KB	0B	3.09KB	mala
pwg_Latn	1	0	1	105	0	105	935B	0B	935B	mala
qaa_Latn	1	0	1	73	0	73	967B	0B	967B	mala
shi_Arab	1	0	1	27	0	27	645B	0B	645B	mala
smn_Cyrl	1	0	1	238	0	238	2.71KB	0B	2.71KB	mala
snk_Arab	1	0	1	12	0	12	472B	0B	472B	mala
snl_Latn	1	0	1	90	0	90	981B	0B	981B	mala
sox_Latn	1	0	1	141	0	141	1.37KB	0B	1.37KB	mala
ssn_Latn	1	0	1	546	0	546	4.86KB	0B	4.86KB	mala
syw_Deva	1	0	1	43	0	43	1.12KB	0B	1.12KB	mala
tay_Hani	1	0	1	46	0	46	1.51KB	0B	1.51KB	mala
tbj_Latn	1	0	1	450	0	450	2.77KB	0B	2.77KB	mala
tdb_Deva	1	0	1	16	0	16	527B	0B	527B	mala
thy_Thai	1	0	1	44	0	44	2.88KB	0B	2.88KB	mala
tnt_Latn	1	0	1	555	0	555	4.46KB	0B	4.46KB	mala
tom_Latn	1	0	1	570	0	570	4.62KB	0B	4.62KB	mala
tpl_Latn	1	0	1	793	0	793	7.20KB	0B	7.20KB	mala
tsb_Latn	1	0	1	287	0	287	2.95KB	0B	2.95KB	mala
tvs_Latn	1	0	1	613	0	613	4.81KB	0B	4.81KB	mala
udg_Mlym	1	0	1	99	0	99	2.56KB	0B	2.56KB	mala
unr_Orya	1	0	1	21	0	21	763B	0B	763B	mala
wbr_Deva	1	0	1	208	0	208	3.26KB	0B	3.26KB	mala
wms_Latn	1	0	1	435	0	435	4.16KB	0B	4.16KB	mala
wnk_Latn	1	0	1	464	0	464	4.22KB	0B	4.22KB	mala
wtk_Latn	1	0	1	162	0	162	1.61KB	0B	1.61KB	mala
xmd_Latn	1	0	1	470	0	470	2.91KB	0B	2.91KB	mala
xmg_Latn	1	0	1	433	0	433	3.42KB	0B	3.42KB	mala
xty_Latn	1	0	1	211	0	211	1.60KB	0B	1.60KB	mala
yav_Latn	1	0	1	306	0	306	3.26KB	0B	3.26KB	mala
yea_Mlym	1	0	1	68	0	68	2.07KB	0B	2.07KB	mala
zuh_Latn	1	0	1	248	0	248	1.97KB	0B	1.97KB	mala
gmv_Latn	0	0	0	0	0	0	0B	0B	0B	fineweb-2
rej_Latn	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Aghb	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Armi	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Bass	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Batk	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Cakm	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Cpmn	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Gong	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Hano	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Maka	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Mero	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Mult	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Ogam	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Orkh	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Osge	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Phnx	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Sora	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Sund	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Tirh	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Toto	0	0	0	0	0	0	0B	0B	0B	fineweb-2
und_Xpeo	0	0	0	0	0	0	0B	0B	0B	fineweb-2
