region	num_languages_in_region	keep_rows	remove_rows	total_rows	keep_tokens	remove_tokens	total_tokens	keep_disk_converted	remove_disk_converted	total_disk_converted	source
Eurasia	471	8055421788	649996862	8705418650	6780405512870	538369587912	7318775100782	34.39TB	9.10TB	43.48TB	 mala, New CC,fineweb-2,fineweb_en
Papunesia	521	231234353	25997698	257232051	69520667855	6128616803	75649284658	440.80GB	76.54GB	517.35GB	 mala, New CC,fineweb-2
Africa	566	54088865	4931883	59020748	12191927668	1038886561	13230814229	80.79GB	18.65GB	99.47GB	 mala, New CC,fineweb-2
North America	224	3499515	315343	3814858	641030983	58902512	699933495	3.51GB	927.61MB	4.42GB	 mala, New CC,fineweb-2
South America	179	576190	48020	624210	175501045	13629806	189130851	1022.90MB	303.05MB	1.29GB	 mala,fineweb-2
Australia	19	31376	3311	34687	4463687	669784	5133471	38.70MB	10.30MB	48.99MB	 mala,fineweb-2
