System,Training compute (FLOP),Training time (hours),Training hardware,Hardware quantity,Publication date,Domain,alpha,lg_alpha,mean_lg_alpha
Llama 3.1-405B,3.8e+25,2142.0,NVIDIA H100,16000.0,2024-07-23,Language,8.06953086902159e+110,110.90684828727257,110.90684828727257
OpenVLA,1.1e+23,336.0,NVIDIA A100,64.0,2024-06-13,"Robotics,Vision,Language",1.0230099999999999e-05,-4.990120121005735,37.06379218466855
MegaScale (Production),1.2e+25,504.0,NVIDIA A100,12288.0,2024-02-23,Language,8.06953086902159e+110,110.90684828727257,37.06379218466855
Gemini 1.0 Ultra,5.0000000001e+25,2400.0,Google TPU v4,55000.0,2023-12-06,"Multimodal,Language,Vision",2.840444216400475e+34,34.45338626457003,32.982314155386845
Nemotron-3-8B,1.8e+23,456.0,NVIDIA A100,1024.0,2023-11-15,Language,5.159910894777197e+37,37.71264220196816,37.06379218466855
SPHINX (Llama 2 13B),3.04e+22,290.0,NVIDIA A100,32.0,2023-11-13,"Vision,Language,Multimodal",6.455624695217272e+111,111.80993827426451,37.06379218466855
LLaVA 1.5,4.55e+22,24.0,NVIDIA A100,8.0,2023-11-05,"Multimodal,Language,Vision",2.5551000000000003e-06,-5.592592098034818,37.06379218466855
Skywork-13B,2.5e+23,940.0,NVIDIA A800,512.0,2023-10-30,Language,7.2517728771676935e+53,53.86044419369465,53.86044419369465
FinGPT-13B,1.6e+23,17.25,NVIDIA GeForce RTX 3090,1.0,2023-10-07,Language,2.5551000000000003e-06,-5.592592098034818,-5.592592098034818
Amazon Titan,4.8e+24,1152.0,NVIDIA A100,13760.0,2023-09-28,"Language,Image generation",1.6148352305094262e+26,26.20812821575,37.06379218466855
Falcon-180B,3.76e+24,4320.0,NVIDIA A100,4096.0,2023-09-06,Language,1839559282600142.8,15.264713788193923,37.06379218466855
PeptideBERT,4.9e+16,4.067,NVIDIA GeForce GTX 1080 Ti,1.0,2023-08-28,Biology,0.004900251148515025,-2.3097816608655544,-2.3097816608655544
Llama 2-70B,8.1e+23,1728.0,NVIDIA A100,1000.0,2023-07-18,Language,1.54599792783204e+47,47.189208907479,37.06379218466855
xTrimoPGLM -100B,6.2e+23,3912.0,NVIDIA A100,768.0,2023-07-06,Biology,1973815806076783.5,15.29530662242844,37.06379218466855
Pangu-Weather,3.98e+22,1536.0,NVIDIA V100,192.0,2023-07-05,Earth science,48862503.19548751,7.688975711842096,2.277226174930846
HyenaDNA,1.811e+21,672.0,NVIDIA A100,8.0,2023-06-27,Biology,6.114267744206076e+33,33.786344452591784,37.06379218466855
InstructBLIP,1.94e+20,36.0,NVIDIA A100,16.0,2023-05-11,"Multimodal,Language,Vision",5.609203991643275e+34,34.748901234417445,37.06379218466855
StarCoder,8.46e+22,625.5,NVIDIA A100,512.0,2023-05-09,Language,1.7398079684502e+23,23.24050131561317,37.06379218466855
LLaVA,4.9e+19,10.0,NVIDIA A100,8.0,2023-04-17,"Multimodal,Vision,Language",7.3330153777869e+68,68.86528259573896,37.06379218466855
Segment Anything Model,7.8e+21,68.0,NVIDIA A100,256.0,2023-04-05,Vision,4.911402229004519e+46,46.691205502994116,37.06379218466855
BloombergGPT,2.36e+23,1270.0,NVIDIA A100,512.0,2023-03-30,Language,7.06955014925811e+34,34.84939177958041,37.06379218466855
VideoMAE V2,9.7e+21,336.0,NVIDIA A100,64.0,2023-03-29,Video,9.171383993139258e+46,46.962434877134584,37.06379218466855
PanGu-Σ,4.67e+23,2400.0,Huawei Ascend 910,512.0,2023-03-20,Language,4.327169293837069e+36,36.63620388663413,36.63620388663413
GPT-4,2.1e+25,2280.0,NVIDIA A100,25000.0,2023-03-15,"Multimodal,Language,Vision,Image generation",3.6936763670499235e+33,33.56745884070854,37.06379218466855
Falcon-40B,2.4e+23,1440.0,NVIDIA A100,384.0,2023-03-15,Language,3.1279646973074087e+43,43.49526184290088,37.06379218466855
LLaMA-65B,5.5e+23,500.0,NVIDIA A100,2048.0,2023-02-24,Language,4.6041233729837755e+55,55.66314695255293,37.06379218466855
BLIP-2 (Q-Former),1.20000000001e+21,200.0,NVIDIA A100,16.0,2023-01-30,"Vision,Language",4.0911983429587115e+38,38.61185053478178,37.06379218466855
Nucleotide Transformer,8.08e+21,672.0,NVIDIA A100,128.0,2023-01-15,Biology,1886.274335035134,3.275604855701235,37.06379218466855
CaLM,2.9e+19,960.0,NVIDIA Quadro RTX 4000,4.0,2022-12-19,Biology,1.2862606188105563e-06,-5.890671026867572,-5.890671026867572
EVA-01,3.7509433344e+21,348.0,NVIDIA A100,128.0,2022-11-14,Vision,244.2221180824752,2.3877849934589093,37.06379218466855
BLOOM-176B,3.65664e+23,2808.0,NVIDIA A100,384.0,2022-11-08,Language,5.699654481174495e+31,31.755848529099243,37.06379218466855
U-PaLM (540B),2.53e+24,120.0,Google TPU v4,512.0,2022-10-20,Language,1.0230099999999999e-05,-4.990120121005735,32.982314155386845
Flan-PaLM 540B,2.5e+24,37.0,Google TPU v4,512.0,2022-10-20,Language,1.02301e-05,-4.990120121005735,32.982314155386845
PaLI,6.2042112e+22,240.0,Google TPU v4,1024.0,2022-09-14,"Language,Vision,Multimodal",8.757523799851326e+21,21.942381326250086,32.982314155386845
GLM-130B,3.778e+23,1440.0,NVIDIA A100,768.0,2022-08-04,Language,1.1115344703650957e+32,32.045922935372595,37.06379218466855
AlexaTM 20B,2.04374016e+23,2880.0,NVIDIA A100,128.0,2022-08-02,Language,1.5586181771603762e+58,58.192739736829644,37.06379218466855
ESM2-15B,7.35000000001e+22,1440.0,NVIDIA V100,512.0,2022-07-21,Biology,1088.400324151482,3.036788662450957,2.277226174930846
Minerva (540B),2.7415e+24,696.0,Google TPU v4,1024.0,2022-06-29,Language,3.227812347608636e+111,111.50890827860053,32.982314155386845
CoCa,7.3e+22,120.0,Google TPU v4,2048.0,2022-06-14,Vision,2.0957084120749303e+27,27.321330856634127,32.982314155386845
Tranception,7.24e+21,336.0,NVIDIA A100,64.0,2022-05-27,Biology,1.4515917019156733e+33,33.16184447687315,37.06379218466855
Imagen,1.46e+22,96.0,Google TPU v4,256.0,2022-05-23,Image generation,1.4294893709472762e+64,64.15518093048372,32.982314155386845
Gato,4.02e+21,96.0,Google TPU v3,256.0,2022-05-12,"Multimodal,Robotics,Games,Language",1662042237537.197,12.220642056329108,8.2980622049184
UL2,1.2e+23,744.0,Google TPU v4,512.0,2022-05-10,Language,1.99686385486276e+29,29.300348455908548,32.982314155386845
OPT-175B,4.3e+23,793.5,NVIDIA A100,1024.0,2022-05-02,Language,6.45191424998719e+54,54.80968858670749,37.06379218466855
Flamingo,2.18972000000001e+23,360.0,Google TPU v4,1536.0,2022-04-29,"Multimodal,Vision,Language,Video",8.084559606532125e+38,38.90765636738872,32.982314155386845
Stable Diffusion (LDM-KL-8-G),5e+22,585.9375,NVIDIA A100,256.0,2022-04-13,Image generation,8.210181350970619e+31,31.914352750159146,37.06379218466855
PaLM (540B),2.5272e+24,1536.0,Google TPU v4,6144.0,2022-04-04,Language,1.6174766652176296e+22,22.208838023968053,32.982314155386845
ProteinBERT,6.5e+19,672.0,NVIDIA Quadro RTX 5000,1.0,2022-02-10,Biology,524100.5207490943,5.719414591220139,5.719414591220139
LaMDA,3.55e+23,1385.0,Google TPU v3,1024.0,2022-02-10,Language,8.375451917386117e+20,20.923008249730614,8.2980622049184
GPT-NeoX-20B,9.31627008e+22,2160.0,NVIDIA A100,96.0,2022-02-09,Language,5.770565015432029e+45,45.761218338471295,37.06379218466855
Detic,2.34399744e+19,24.0,NVIDIA V100,32.0,2022-01-07,Vision,0.004723141230430078,-2.325769068041884,2.277226174930846
XGLM-7.5B,2.25e+22,504.0,NVIDIA A100,256.0,2021-12-20,Language,6379796788320.5625,12.804806845630791,37.06379218466855
GLaM,3.74e+23,1366.0,Google TPU v4,1024.0,2021-12-13,Language,9.719981704266518e+22,22.987665447462945,32.982314155386845
Gopher (280B),6.31e+23,920.0,Google TPU v3,4096.0,2021-12-08,Language,31798272282.897556,10.502403523808715,8.2980622049184
Florence,4.831e+22,240.0,NVIDIA A100,512.0,2021-11-22,Vision,1.6474805162202882e+39,39.216820287151904,37.06379218466855
T0-XXL,9.1819e+20,27.0,Google TPU v3,256.0,2021-10-15,Language,1158385833.3824058,9.063853237629427,8.2980622049184
Megatron-Turing NLG 530B,1.17e+24,770.0,NVIDIA A100,4480.0,2021-10-11,Language,1.806201542572005e+31,31.25676620884754,37.06379218466855
AlphaFold-Multimer,4.35e+21,384.0,Google TPU v3,64.0,2021-10-04,Biology,69267327132450.41,13.840528429530531,8.2980622049184
SEER,4.42e+21,192.0,NVIDIA V100,512.0,2021-07-29,Vision,0.0020397505125876922,-2.6904229490606464,2.277226174930846
EfficientNetV2-XL,9.56e+19,45.0,Google TPU v3,16.0,2021-06-23,Vision,10929984411.574715,10.038619542556116,8.2980622049184
ALIGN,2.598670000001e+22,347.3,Google TPU v3,512.0,2021-06-11,"Multimodal,Vision,Language",1812408993.0163345,9.258256208444072,8.2980622049184
DeBERTa,2.588e+22,720.0,NVIDIA V100,256.0,2021-06-10,Language,355523028.6337877,8.55086773694951,2.277226174930846
MedBERT,9.47e+18,168.0,NVIDIA V100,1.0,2021-05-20,Medicine,28.37233782264756,1.4528951222479105,2.277226174930846
PLUG,3.5997696e+22,840.0,NVIDIA A100,128.0,2021-04-19,Language,1.7214306941372366e+32,32.23588954242863,37.06379218466855
Meta Pseudo Labels,4.79e+22,264.0,Google TPU v3,1024.0,2021-03-01,Vision,6629003202874.633,12.82144822885166,8.2980622049184
Switch,8.22e+22,648.0,Google TPU v3,1024.0,2021-01-11,Language,1001205.6457564534,6.000523289911376,8.2980622049184
CLIP (ViT L/14@336px),1.05e+22,288.0,NVIDIA V100,256.0,2021-01-05,"Multimodal,Vision,Language,Video",1552288990.062534,9.190972577170843,2.277226174930846
DensePhrases,2.09952e+18,20.0,NVIDIA TITAN Xp,8.0,2020-12-23,Language,0.00017067364627642442,-3.767833533134322,-3.767833533134322
CPM-Large,1.8e+21,336.0,NVIDIA V100,64.0,2020-12-01,Language,440.0132001176544,2.6434657052413413,2.277226174930846
German ELECTRA Large,1.42829568e+21,168.0,Google TPU v3,64.0,2020-10-21,Language,744679001.3288596,8.871969107656206,8.2980622049184
GBERT-Large,2.2444646e+21,264.0,Google TPU v3,64.0,2020-10-21,Language,473886325.5730713,8.67567417691244,8.2980622049184
Conformer + Wav2vec 2.0 + Noisy Student,7.6e+21,168.0,Google TPU v3,256.0,2020-10-20,Speech,36511351179843.766,13.562427905233815,8.2980622049184
LUKE,1.75799808e+20,720.0,NVIDIA V100,16.0,2020-10-02,Language,4.3925270864484515e-06,-5.357285551754392,2.277226174930846
ProBERTa,9.72e+18,18.0,NVIDIA V100,4.0,2020-09-01,Biology,202629445547.55057,11.306702556076639,2.277226174930846
GShard (dense),1.3702e+23,1008.0,Google TPU v3,1024.0,2020-06-30,Language,7558451.885524184,6.878432852852629,8.2980622049184
GPT-3 175B (davinci),3.14e+23,355.2,NVIDIA V100,10000.0,2020-05-28,Language,9.816800926466982,0.9919699840868219,2.277226174930846
UnifiedQA,1.65e+19,36.0,Google TPU v3,8.0,2020-05-02,Language,21.3852501552658,1.3301143348086886,8.2980622049184
ALBERT-xxlarge,2.39e+21,32.0,Google TPU v3,512.0,2020-02-09,Language,18256963643.24722,10.261428550710193,8.2980622049184
Meena,1.12e+23,720.0,Google TPU v3,1024.0,2020-01-28,Language,2169435689.319119,9.336346780444579,8.2980622049184
DD-PPO,7.8e+20,66.0,NVIDIA V100,64.0,2019-12-19,Robotics,3381057648993306.5,15.529052575801627,2.277226174930846
Noisy Student (L2),8.4934656e+20,144.0,Google TPU v3,1024.0,2019-11-11,Vision,1.7922561971072178e-08,-7.746599909273735,8.2980622049184
AlphaStar,5.9250000000001e+22,1056.0,Google TPU v3,384.0,2019-10-30,Games,788605092.5284972,8.896859577264067,8.2980622049184
T5-11B,3.3e+22,481.9,Google TPU v3,512.0,2019-10-23,Language,41760061.84977816,7.620761133217325,8.2980622049184
Megatron-LM (8.3B),9.1e+21,327.0,NVIDIA V100,512.0,2019-09-17,Language,0.016262156010864025,-1.7888218768508712,2.277226174930846
Megatron-BERT,2.2e+22,374.0,NVIDIA V100,512.0,2019-09-17,Language,285654.3072035603,5.455840776902207,2.277226174930846
RoBERTa Large,4.15383552e+21,120.0,NVIDIA V100,1024.0,2019-07-01,Language,7.351633770119388e-05,-4.133616135949728,2.277226174930846
MnasNet-A3,1.5e+21,108.0,Google TPU v3,256.0,2019-05-29,Vision,0.09579060805169663,-1.0186770699535619,8.2980622049184
MnasNet-A1 + SSDLite,1.5e+21,108.0,Google TPU v3,256.0,2019-05-29,Vision,0.09579060805169663,-1.0186770699535619,8.2980622049184
WeNet (Penn Treebank),7.30000001e+17,24.0,NVIDIA V100,1.0,2019-04-08,Language,0.14682814170272046,-0.8331906977245231,2.277226174930846
SciBERT,8.926848e+19,168.0,Google TPU v3,4.0,2019-03-26,Language,11914864021.261753,10.07608909031213,8.2980622049184
Mesh-TensorFlow Transformer 4.9B (language),1.617408e+20,13.0,Google TPU v2,256.0,2018-11-05,Language,0.16549343645537457,-0.7812192258625107,-0.8344506456150862
Mesh-TensorFlow Transformer 2.9B (translation),6.84288e+19,22.0,Google TPU v2,64.0,2018-11-05,Language,0.3911663043490592,-0.4076385630499266,-0.8344506456150862
BERT-Large,2.85e+20,96.0,Google TPU v2,64.0,2018-10-11,Language,0.04847366443497573,-1.3144941479328212,-0.8344506456150862
Transformer (Adaptive Input Embeddings) WT103,7.2e+19,67.0,NVIDIA V100,64.0,2018-09-28,Language,1.82440570405531e-05,-4.738878578595168,2.277226174930846
BigGAN-deep 512x512,1.8e+21,48.0,Google TPU v3,256.0,2018-09-28,Image generation,28840214497.485893,10.459998486100377,8.2980622049184
Big Transformer for Back-Translation,1.080843264e+20,27.666,NVIDIA V100,128.0,2018-08-28,Language,0.0010245298069101508,-2.989475402037512,2.277226174930846
GPT-1,1.7578125e+19,720.0,NVIDIA Quadro P600,8.0,2018-06-01,Language,1.4449256920236726e-07,-6.840154486730186,-6.840154486730186
"LSTM (Hebbian, Cache, MbPA)",2.4e+19,144.0,NVIDIA P100,8.0,2018-03-27,Language,0.00021182251857168832,-3.6740278725025037,-4.16559617151278
AmoebaNet-A (F=448),3.85296912e+20,168.0,NVIDIA Tesla K40,450.0,2018-02-05,Vision,2.718490467093702e-08,-7.56567218559619,-7.259197518717148
AlphaZero,3.667927300468287e+22,24.0,"Google TPU v2,Google TPU v1",5064.0,2017-12-05,Games,1.5898328664718033e+28,28.20135147087126,28.20135147087126
RetinaNet-R101,2.065392e+18,35.0,NVIDIA M40,8.0,2017-08-07,Vision,1.6634926658000608e-05,-4.778979109558961,-4.778979109558961
JFT,8.43e+20,1440.0,NVIDIA Tesla K80,50.0,2017-07-10,Vision,2.5029058514614125e-07,-6.6015554863725185,-6.596338343261943
Transformer,7.4245248e+18,84.0,NVIDIA P100,8.0,2017-06-12,Language,2.2020923565072604e-05,-4.657164470523056,-4.16559617151278
MoE-Multi,9.393905664e+19,288.0,NVIDIA Tesla K40,64.0,2017-01-23,Language,1.1150058556438936e-07,-6.952722851838105,-7.259197518717148
BIDAF,3.4686144e+18,60.0,NVIDIA GTX Titan,8.0,2016-11-05,Language,9.225678311780868e-06,-5.0350016927858645,-4.204724192051861
Xception,4.36e+20,720.0,NVIDIA Tesla K80,60.0,2016-10-07,Vision,2.563768456421432e-07,-6.591121200151366,-6.596338343261943
Part-of-sentence tagging model,1.454112e+17,12.0,NVIDIA GTX Titan,1.0,2016-05-29,Language,0.001695165231737706,-2.7707879636942105,-4.204724192051861
Named Entity Recognition model,9.69408e+16,8.0,NVIDIA GTX Titan,1.0,2016-05-29,Language,0.002542747847606626,-2.594696704638518,-4.204724192051861
DeepSpeech2 (English),2.6e+19,120.0,NVIDIA GTX Titan,16.0,2015-12-08,Speech,1.6068526853390717e-05,-4.794023937099115,-4.204724192051861
VGG16,1.2291e+19,504.0,NVIDIA GTX Titan,4.0,2014-09-04,Vision,1.4821403746641133e-06,-5.829110662041602,-4.204724192051861
