Spaces:
Sleeping
Sleeping
- Code release: https://github.com/microsoft/torchscale
- March 2022: release preprint DeepNet: Scaling Transformers to 1,000 Layers
@article{deepnet,
author = {Hongyu Wang and Shuming Ma and Li Dong and Shaohan Huang and Dongdong Zhang and Furu Wei},
title = {{DeepNet}: Scaling {Transformers} to 1,000 Layers},
journal = {CoRR},
volume = {abs/2203.00555},
year = {2022},
}