"Name","State","Notes","User","Tags","Created","Runtime","Sweep","activation_dim","auxk_alpha","decay_start","device","dict_class","dict_size","k","layer","lm_name","lr","seed","steps","submodule_name","threshold_beta","threshold_start_step","trainer_class","wandb_name","warmup_steps","auxk_loss","dead_features","effective_l0","frac_variance_explained","l0","l2_loss","loss","pre_norm_auxk_loss"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_1","finished","-","","","2025-12-11T09:07:16.000Z","17483","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","80","6","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_6","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_1","1000","1.0005249977111816","40","80","0.8422123193740845","80","0.019570088014006615","0.05083649605512619","0.019570088014006615"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_5","finished","-","","","2025-12-11T09:07:15.000Z","43849","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","820","26","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_26","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_5","1000","1.0005303621292114","3","820","0.9012829661369324","820","0.05437375977635384","0.0856403335928917","0.05437375977635384"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_1","finished","-","","","2025-12-11T09:07:15.000Z","24044","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","80","11","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_11","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_1","1000","1.000508427619934","155","80","0.7812474966049194","80","0.0386669784784317","0.06993286311626434","0.0386669784784317"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_4","finished","-","","","2025-12-11T09:07:15.000Z","24044","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","520","11","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_11","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_4","1000","0","0","520","0.8678233623504639","520","0.023364273831248283","0.023364273831248283","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_0","finished","-","","","2025-12-11T09:07:15.000Z","43716","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","50","26","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_26","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_0","1000","1.0005096197128296","1895","50","0.7535881996154785","50","0.13572335243225098","0.1669892817735672","0.1357218623161316"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_3","finished","-","","","2025-12-11T09:07:15.000Z","48704","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","320","30","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_30","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_3","1000","0","0","320","0.8482347726821899","320","0.07884827256202698","0.07884827256202698","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_4","finished","-","","","2025-12-11T09:07:15.000Z","48704","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","520","30","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_30","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_4","1000","0","0","520","0.8697522878646851","520","0.06766977906227112","0.06766977906227112","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_4","finished","-","","","2025-12-11T09:07:15.000Z","17484","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","520","6","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_6","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_4","1000","0","0","520","0.9053137302398682","520","0.011743995361030102","0.011743995361030102","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_3","finished","-","","","2025-12-11T09:07:15.000Z","30638","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","320","16","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_16","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_3","1000","1.000519871711731","2","320","0.8277146220207214","320","0.04441872239112854","0.07568496465682983","0.04441872239112854"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_3","finished","-","","","2025-12-11T09:07:14.000Z","17485","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","320","6","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_6","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_3","1000","0","0","320","0.8888579607009888","320","0.013784630224108696","0.013784630224108696","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_2","finished","-","","","2025-12-11T09:07:14.000Z","11216","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","160","1","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_1","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_2","1000","1.0005438327789309","185","160","0.9446834325790404","160","0.0027686827816069126","0.03403567895293236","0.0027686827816069126"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_2","finished","-","","","2025-12-11T09:07:14.000Z","17485","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","160","6","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_6","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_2","1000","1.000515699386597","8","160","0.8663761615753174","160","0.016572941094636917","0.04783905670046806","0.016572941094636917"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_1","finished","-","","","2025-12-11T09:07:14.000Z","11217","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","80","1","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_1","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_1","1000","1.0005593299865725","581","80","0.9237824082374572","80","0.0038148704916238785","0.03508234769105911","0.0038148704916238785"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_3","finished","-","","","2025-12-11T09:07:14.000Z","11216","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","320","1","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_1","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_3","1000","1.000641942024231","600","320","0.970188319683075","320","0.0014922693371772766","0.032762330025434494","0.0014922693371772766"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_0","finished","-","","","2025-12-11T09:07:14.000Z","48705","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","50","30","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_30","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_0","1000","1.0005375146865845","792","50","0.7610540390014648","50","0.12414628267288208","0.15541307628154755","0.12414595484733582"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_0","finished","-","","","2025-12-11T09:07:14.000Z","30639","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","50","16","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_16","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_0","1000","1.0004843473434448","547","50","0.710457444190979","50","0.07464957982301712","0.10591471195220949","0.07464736700057983"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_2","finished","-","","","2025-12-11T09:07:14.000Z","43717","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","160","26","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_26","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_2","1000","1.0005069971084597","48","160","0.8265218734741211","160","0.09555026143789291","0.12681610882282257","0.09555026143789291"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_0","finished","-","","","2025-12-11T09:07:14.000Z","11216","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","50","1","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_1","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_0","1000","1.0005760192871094","1138","50","0.9073431491851808","50","0.004637772683054209","0.03590577468276024","0.004637772683054209"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_0","finished","-","","","2025-12-11T09:07:14.000Z","24046","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","50","11","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_11","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_0","1000","1.000495433807373","427","50","0.7556782364845276","50","0.043186262249946594","0.074451744556427","0.04318607598543167"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_4","finished","-","","","2025-12-11T09:07:14.000Z","30639","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","520","16","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_16","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_4","1000","1.0005261898040771","1","520","0.8535748720169067","520","0.037751659750938416","0.06901810318231583","0.037751659750938416"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_4","finished","-","","","2025-12-11T09:07:14.000Z","43717","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","520","26","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_26","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_4","1000","0","0","520","0.8805025815963745","520","0.06581886112689972","0.06581886112689972","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_5","finished","-","","","2025-12-11T09:07:14.000Z","30639","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","820","16","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_16","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_5","1000","1.0005276203155518","33","820","0.8799471855163574","820","0.030952323228120804","0.06221881136298179","0.030952323228120804"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_3","finished","-","","","2025-12-11T09:07:14.000Z","24045","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","320","11","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_11","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_3","1000","0","0","320","0.8455768823623657","320","0.02729632705450058","0.02729632705450058","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_5","finished","-","","","2025-12-11T09:07:14.000Z","17485","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","820","6","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_6","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_5","1000","0","0","820","0.923255443572998","820","0.009519226849079132","0.009519226849079132","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_5","finished","-","","","2025-12-11T09:07:14.000Z","48705","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","820","30","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_30","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_5","1000","0","0","820","0.891690731048584","820","0.056272681802511215","0.056272681802511215","-1"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_0","finished","-","","","2025-12-11T09:07:14.000Z","17485","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","50","6","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_6","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_6_trainer_0","1000","1.000515699386597","334","50","0.8217568397521973","50","0.02210693433880806","0.053373049944639206","0.02210693247616291"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_1","finished","-","","","2025-12-11T09:07:14.000Z","48705","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","80","30","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_30","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_1","1000","1.000522494316101","72","80","0.7871890664100647","80","0.1105656921863556","0.14183202385902405","0.1105656921863556"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_4","finished","-","","","2025-12-11T09:07:14.000Z","11348","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","520","1","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_1","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_4","1000","1.0010359287261963","9641","520","0.9822712540626526","520","0.0008877876680344343","0.03217016160488129","0.0008877869695425034"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_5","finished","-","","","2025-12-11T09:07:14.000Z","24045","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","820","11","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_11","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_5","1000","1.00053071975708","4","820","0.8916000127792358","820","0.019161352887749672","0.050427936017513275","0.019161352887749672"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_1","finished","-","","","2025-12-11T09:07:14.000Z","30640","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","80","16","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_16","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_1","1000","1.000507354736328","506","80","0.7459968328475952","80","0.06548646092414856","0.0967523157596588","0.06548641622066498"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_2","finished","-","","","2025-12-11T09:07:14.000Z","24045","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","160","11","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_11","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_11_trainer_2","1000","1.0005158185958862","2","160","0.8148918151855469","160","0.032720208168029785","0.06398633122444153","0.032720208168029785"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_5","finished","-","","","2025-12-11T09:07:14.000Z","11217","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","820","1","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_1","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_1_trainer_5","1000","1.00225031375885","12565","820","0.9912627935409546","820","0.00043805636232718825","0.0317583791911602","0.00043805636232718825"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_2","finished","-","","","2025-12-11T09:07:14.000Z","30639","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","160","16","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_16","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_16_trainer_2","1000","1.00049889087677","23","160","0.7888644337654114","160","0.05443394184112549","0.08569952845573425","0.05443394184112549"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_2","finished","-","","","2025-12-11T09:07:11.000Z","48708","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","160","30","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_30","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_30_trainer_2","1000","1.0005255937576294","2","160","0.8180047273635864","160","0.09455575793981552","0.12582218647003174","0.09455575793981552"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_3","finished","-","","","2025-12-11T09:07:11.000Z","43720","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","320","26","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_26","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_3","1000","1.0005102157592771","1","320","0.85929936170578","320","0.07749694585800171","0.10876289010047913","0.07749694585800171"
"TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_1","finished","-","","","2025-12-11T09:07:10.000Z","43721","","4096","0.03125","58593","cuda:0","AutoEncoderTopK","16384","80","26","GSAI-ML/LLaDA-8B-Base","0.0003","3407","73242","resid_post_layer_26","0.999","1000","TopKTrainer","TopKTrainer-GSAI-ML/LLaDA-8B-Base-resid_post_layer_26_trainer_1","1000","1.0004985332489014","1432","80","0.7882760763168335","80","0.11661496758461","0.14788055419921875","0.11661474406719208"