% ICLR 2026 Conference Bibliography
% Save this as: iclr2026_conference.bib

@article{brown2020language,
  title={Language models are few-shot learners},
  author={Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and others},
  journal={Advances in Neural Information Processing Systems},
  volume={33},
  pages={1877--1901},
  year={2020}
}

@article{wei2022chain,
  title={Chain-of-thought prompting elicits reasoning in large language models},
  author={Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Xia, Fei and Chi, Ed and Le, Quoc V and Zhou, Denny and others},
  journal={Advances in Neural Information Processing Systems},
  volume={35},
  pages={24824--24837},
  year={2022}
}

@article{madaan2023self,
  title={Self-refine: Iterative refinement with self-feedback},
  author={Madaan, Aman and Tandon, Niket and Gupta, Prakhar and Hallinan, Skyler and Gao, Luyu and Wiegreffe, Sarah and Alon, Uri and Dziri, Nouha and Prabhumoye, Shrimai and Yang, Yiming and others},
  journal={Advances in Neural Information Processing Systems},
  volume={36},
  year={2023}
}

@article{cobbe2021training,
  title={Training verifiers to solve math word problems},
  author={Cobbe, Karl and Kosaraju, Vineet and Bavarian, Mohammad and Chen, Mark and Jun, Heewoo and Kaiser, Lukasz and Plappert, Matthias and Tworek, Jerry and Hilton, Jacob and Nakano, Reiichiro and others},
  journal={arXiv preprint arXiv:2110.14168},
  year={2021}
}

@article{lightman2023lets,
  title={Let's verify step by step},
  author={Lightman, Hunter and Kosaraju, Vineet and Burda, Yura and Edwards, Harri and Baker, Bowen and Lee, Teddy and Leike, Jan and Schulman, John and Sutskever, Ilya and Cobbe, Karl},
  journal={arXiv preprint arXiv:2305.20050},
  year={2023}
}

@article{yao2024tree,
  title={Tree of thoughts: Deliberate problem solving with large language models},
  author={Yao, Shunyu and Yu, Dian and Zhao, Jeffrey and Shafran, Izhak and Griffiths, Thomas L and Cao, Yuan and Narasimhan, Karthik},
  journal={Advances in Neural Information Processing Systems},
  volume={36},
  year={2024}
}

@article{zelikman2022star,
  title={STaR: Bootstrapping reasoning with reasoning},
  author={Zelikman, Eric and Wu, Yuhuai and Mu, Jesse and Goodman, Noah},
  journal={Advances in Neural Information Processing Systems},
  volume={35},
  pages={15476--15488},
  year={2022}
}

@article{graves2016adaptive,
  title={Adaptive computation time for recurrent neural networks},
  author={Graves, Alex},
  journal={arXiv preprint arXiv:1603.08983},
  year={2016}
}

@article{bengio2013estimating,
  title={Estimating or propagating gradients through stochastic neurons for conditional computation},
  author={Bengio, Yoshua and L{\'e}onard, Nicholas and Courville, Aaron},
  journal={arXiv preprint arXiv:1308.3432},
  year={2013}
}

@inproceedings{teerapittayanon2016branchynet,
  title={BranchyNet: Fast inference via early exiting from deep neural networks},
  author={Teerapittayanon, Surat and McDanel, Bradley and Kung, HT},
  booktitle={23rd International Conference on Pattern Recognition (ICPR)},
  pages={2464--2469},
  year={2016},
  organization={IEEE}
}

@inproceedings{kaya2019shallow,
  title={Shallow-deep networks: Understanding and mitigating network overthinking},
  author={Kaya, Yigitcan and Hong, Sanghyun and Dumitras, Tudor},
  booktitle={International Conference on Machine Learning},
  pages={3301--3310},
  year={2019},
  organization={PMLR}
}

@article{shazeer2017outrageously,
  title={Outrageously large neural networks: The sparsely-gated mixture-of-experts layer},
  author={Shazeer, Noam and Mirhoseini, Azalia and Maziarz, Krzysztof and Davis, Andy and Le, Quoc and Hinton, Geoffrey and Dean, Jeff},
  journal={arXiv preprint arXiv:1701.06538},
  year={2017}
}

@article{fedus2022switch,
  title={Switch transformer: Scaling to trillion parameter models with simple and efficient sparsity},
  author={Fedus, William and Zoph, Barret and Shazeer, Noam},
  journal={Journal of Machine Learning Research},
  volume={23},
  number={120},
  pages={1--39},
  year={2022}
}

@inproceedings{xin2020deebert,
  title={DeeBERT: Dynamic early exiting for accelerating BERT inference},
  author={Xin, Ji and Tang, Raphael and Lee, Jaejun and Yu, Yaoliang and Lin, Jimmy},
  booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
  pages={2246--2251},
  year={2020}
}

@article{zhou2020bert,
  title={BERT loses patience: Fast and robust inference with early exit},
  author={Zhou, Wangchunshu and Xu, Canwen and Ge, Tao and McAuley, Julian and Xu, Ke and Wei, Furu},
  journal={Advances in Neural Information Processing Systems},
  volume={33},
  pages={18330--18341},
  year={2020}
}

@book{arora2009computational,
  title={Computational complexity: A modern approach},
  author={Arora, Sanjeev and Barak, Boaz},
  year={2009},
  publisher={Cambridge University Press}
}

@book{embretson2000item,
  title={Item response theory for psychologists},
  author={Embretson, Susan E and Reise, Steven P},
  year={2000},
  publisher={Lawrence Erlbaum Associates}
}

@article{martinc2021supervised,
  title={Supervised and unsupervised neural approaches to text readability},
  author={Martinc, Matej and Pollak, Senja and Robnik-{\v{S}}ikonja, Marko},
  journal={Computational Linguistics},
  volume={47},
  number={1},
  pages={141--179},
  year={2021},
  publisher={MIT Press}
}

@inproceedings{swayamdipta2020dataset,
  title={Dataset cartography: Mapping and diagnosing datasets with training dynamics},
  author={Swayamdipta, Swabha and Schwartz, Roy and Lourie, Nicholas and Wang, Yizhong and Hajishirzi, Hannaneh and Smith, Noah A and Choi, Yejin},
  booktitle={Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
  pages={9275--9293},
  year={2020}
}

% Additional references for completeness (these are from the original template)
@incollection{Bengio+chapter2007,
  author = {Bengio, Yoshua and LeCun, Yann},
  booktitle = {Large Scale Kernel Machines},
  publisher = {MIT Press},
  title = {Scaling Learning Algorithms Towards {AI}},
  year = {2007}
}

@article{Hinton06,
  author = {Hinton, Geoffrey E. and Osindero, Simon and Teh, Yee Whye},
  journal = {Neural Computation},
  pages = {1527--1554},
  title = {A Fast Learning Algorithm for Deep Belief Nets},
  volume = {18},
  year = {2006}
}

@book{goodfellow2016deep,
  title={Deep learning},
  author={Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron and Bengio, Yoshua},
  volume={1},
  year={2016},
  publisher={MIT Press}
}