"Name","State","Notes","User","Tags","Created","Runtime","Sweep","activation_dim","auxk_alpha","decay_start","device","dict_class","dict_size","k","layer","lm_name","lr","seed","steps","submodule_name","threshold_beta","threshold_start_step","trainer_class","wandb_name","warmup_steps","auxk_loss","dead_features","effective_l0","frac_variance_explained","l0","l2_loss","loss","pre_norm_auxk_loss"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_3","finished","-","","","2025-12-11T09:11:04.000Z","14031","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","320","11","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_11","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_3","1000","0","0","320","0.8986561894416809","320","0.025930050760507584","0.025930050760507584","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_2","finished","-","","","2025-12-11T09:10:35.000Z","14060","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","160","11","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_11","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_2","1000","1.0004969835281372","13","160","0.880593478679657","160","0.030551541596651077","0.061817072331905365","0.030551541596651077"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_1","finished","-","","","2025-12-11T09:10:35.000Z","8848","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","80","1","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_1","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_1","1000","1.0005686283111572","41","80","0.9727041721343994","79.99853515625","0.004078705795109272","0.03534647449851036","0.004078705795109272"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_2","finished","-","","","2025-12-11T09:10:35.000Z","8718","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","160","1","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_1","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_2","1000","1.0005747079849243","5","160","0.979300320148468","159.806640625","0.003093091305345297","0.03436104953289032","0.003093091305345297"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_2","finished","-","","","2025-12-11T09:10:34.000Z","11533","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","160","6","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_6","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_2","1000","1.0004929304122925","3","160","0.929234504699707","160","0.014858055859804152","0.04612345993518829","0.014858055859804152"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_4","finished","-","","","2025-12-11T09:10:32.000Z","22209","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","520","26","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_26","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_4","1000","1.0005148649215698","23","520","0.9158118367195128","519.29931640625","0.05031690001487732","0.08158299326896667","0.05031690001487732"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_4","finished","-","","","2025-12-11T09:10:32.000Z","11460","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","520","6","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_6","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_4","1000","0","0","520","0.9478220343589784","520","0.010956020094454288","0.010956020094454288","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_4","finished","-","","","2025-12-11T09:10:32.000Z","8721","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","520","1","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_1","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_4","1000","0","0","520","0.9909275770187378","519.91796875","0.0013559721410274506","0.0013559721410274506","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_0","finished","-","","","2025-12-11T09:10:32.000Z","8720","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","50","1","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_1","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_0","1000","1.0005773305892944","178","50","0.968601942062378","50","0.004691730253398418","0.035959772765636444","0.004691730253398418"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_0","finished","-","","","2025-12-11T09:10:32.000Z","14063","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","50","11","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_11","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_0","1000","1.0004807710647583","323","50","0.8468867540359497","50","0.039175137877464294","0.07044015824794769","0.03917515650391579"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_3","finished","-","","","2025-12-11T09:10:32.000Z","16893","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","320","16","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_16","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_3","1000","1.0005300045013428","7","320","0.9111299514770508","320","0.0371464341878891","0.06841299682855606","0.0371464341878891"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_5","finished","-","","","2025-12-11T09:10:32.000Z","16894","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","820","16","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_16","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_5","1000","1.0005762577056885","47","820","0.9378958344459534","820","0.025959869846701625","0.057227879762649536","0.025959869846701625"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_5","finished","-","","","2025-12-11T09:10:32.000Z","24415","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","820","30","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_30","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_5","1000","0","0","820","0.904727816581726","820","0.0509878545999527","0.0509878545999527","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_0","finished","-","","","2025-12-11T09:10:32.000Z","24285","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","50","30","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_30","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_0","1000","1.0005072355270386","533","50","0.8048878312110901","50","0.10441750288009644","0.1356833577156067","0.10441745072603226"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_4","finished","-","","","2025-12-11T09:10:32.000Z","16893","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","520","16","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_16","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_4","1000","1.0005491971969604","7","520","0.9235983490943908","520","0.0319354385137558","0.06320260465145111","0.0319354385137558"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_1","finished","-","","","2025-12-11T09:10:32.000Z","24285","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","80","30","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_30","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_1","1000","1.000504732131958","21","80","0.8227360248565674","80","0.09486545622348784","0.12613123655319214","0.09486545622348784"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_4","finished","-","","","2025-12-11T09:10:32.000Z","14063","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","520","11","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_11","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_4","1000","0","0","520","0.912599503993988","520","0.02236315608024597","0.02236315608024597","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_5","finished","-","","","2025-12-11T09:10:32.000Z","14063","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","820","11","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_11","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_5","1000","0","0","820","0.9277659058570862","820","0.01848287507891655","0.01848287507891655","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_4","finished","-","","","2025-12-11T09:10:32.000Z","24415","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","520","30","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_30","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_4","1000","0","0","520","0.8858898282051086","520","0.061068981885910034","0.061068981885910034","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_2","finished","-","","","2025-12-11T09:10:32.000Z","16894","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","160","16","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_16","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_2","1000","1.0005141496658323","24","160","0.8929083347320557","160","0.04476212337613106","0.07602819055318832","0.04476212337613106"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_5","finished","-","","","2025-12-11T09:10:32.000Z","22208","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","820","26","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_26","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_5","1000","1.0005550384521484","80","820","0.930059015750885","817.7392578125","0.04180343449115753","0.07307077944278717","0.04180343449115753"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_2","finished","-","","","2025-12-11T09:10:32.000Z","24285","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","160","30","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_30","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_2","1000","1.0005090236663818","5","160","0.8456177711486816","160","0.08262030780315399","0.11388621479272842","0.08262030780315399"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_3","finished","-","","","2025-12-11T09:10:32.000Z","22208","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","320","26","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_26","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_3","1000","1.000486969947815","6","320","0.9014846682548524","319.4267578125","0.058878205716609955","0.09014342725276948","0.058878205716609955"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_1","finished","-","","","2025-12-11T09:10:32.000Z","22209","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","80","26","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_26","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_1","1000","1.0004656314849854","978","80","0.8550389409065247","79.99609375","0.08663490414619446","0.11789945513010024","0.08663490414619446"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_0","finished","-","","","2025-12-11T09:10:32.000Z","16958","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","50","16","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_16","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_0","1000","1.0005016326904297","540","50","0.8570132255554199","50","0.059765368700027466","0.0910310447216034","0.059764787554740906"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_3","finished","-","","","2025-12-11T09:10:32.000Z","11503","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","320","6","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_6","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_3","1000","1.0005100965499878","3","320","0.9395745992660522","320","0.012687252834439278","0.043953195214271545","0.012687252834439278"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_2","finished","-","","","2025-12-11T09:10:32.000Z","22208","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","160","26","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_26","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_2","1000","1.000492811203003","24","160","0.8796691298484802","159.91748046875","0.07191678881645203","0.10318218916654588","0.07191678881645203"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_0","finished","-","","","2025-12-11T09:10:32.000Z","11460","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","50","6","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_6","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_0","1000","1.0005061626434326","93","50","0.909528374671936","50","0.01899584010243416","0.05026165768504143","0.01899584010243416"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_5","finished","-","","","2025-12-11T09:10:32.000Z","8721","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","820","1","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_1","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_5","1000","0","0","820","0.9941165447235109","819.47998046875","0.0008796387119218707","0.0008796387119218707","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_1","finished","-","","","2025-12-11T09:10:32.000Z","14063","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","80","11","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_11","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_1","1000","1.0004788637161257","22","80","0.8613290786743164","80","0.035479914397001266","0.06674487888813019","0.035479914397001266"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_0","finished","-","","","2025-12-11T09:10:32.000Z","22338","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","50","26","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_26","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_0","1000","1.0004748106002808","3498","50","0.8322882652282715","49.99951171875","0.10023286193609238","0.13149769604206085","0.10023258626461028"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_1","finished","-","","","2025-12-11T09:10:32.000Z","16895","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","80","16","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_16","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_1","1000","1.0005276203155518","157","80","0.8736594319343567","80","0.05280847847461701","0.084074966609478","0.05280847847461701"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_3","finished","-","","","2025-12-11T09:10:32.000Z","24287","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","320","30","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_30","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_3","1000","1.0005040168762207","2","320","0.8687350749969482","320","0.0702483281493187","0.1015140786767006","0.0702483281493187"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_5","finished","-","","","2025-12-11T09:10:32.000Z","11591","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","820","6","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_6","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_5","1000","0","0","820","0.956932246685028","820","0.009043281897902489","0.009043281897902489","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_1","finished","-","","","2025-12-11T09:10:32.000Z","11722","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","80","6","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_6","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_1","1000","1.0005041360855105","10","80","0.9184143543243408","80","0.017130058258771896","0.04839581251144409","0.017130058258771896"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_3","finished","-","","","2025-12-11T09:10:32.000Z","8720","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","320","1","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_1","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_3","1000","0","0","320","0.9866312146186828","319.70849609375","0.001997709507122636","0.001997709507122636","-1"